示例#1
0
def test_location_geo_url():
    """Test the location algorithm with a u-geo geo: URL.

    http://microformats.org/wiki/microformats2#h-card
    https://tools.ietf.org/html/rfc5870
    """
    parsed = {
        'items': [{
            'type': ['h-entry'],
            'properties': {
                'geo': [u'geo:48.2010,16.3695,183;crs=wgs84;u=40'],
            },
        }]}
    result = mf2util.interpret(parsed, 'http://example.com/')
    assert result['location'] == {
        'altitude': '183',
        'latitude': '48.2010',
        'longitude': '16.3695',
    }

    parsed['items'][0]['properties']['geo'] = ['geo:48.2010,16.3695']
    result = mf2util.interpret(parsed, 'http://example.com/')
    assert result['location'] == {
        'latitude': '48.2010',
        'longitude': '16.3695',
    }
示例#2
0
def fetch_reply_contexts(reply_pairs, now, fetch_mf2_func):
    old_contexts = {}
    in_reply_tos = [url for _, url in reply_pairs]
    if in_reply_tos:
        for entry in (Entry.query
                      .join(Entry.feed)
                      .filter(Entry.permalink.in_(in_reply_tos),
                              Feed.type == 'html')):
            old_contexts[entry.permalink] = entry

    for entry, in_reply_to in reply_pairs:
        context = old_contexts.get(in_reply_to)
        if not context:
            current_app.logger.info('fetching in-reply-to: %s', in_reply_to)
            try:
                proxied_reply_url = proxy_url(in_reply_to)
                parsed = mf2util.interpret(
                    mf2py.parse(url=proxied_reply_url), in_reply_to,
                    fetch_mf2_func=fetch_mf2_func)
                if parsed:
                    context = hentry_to_entry(parsed, None, False, now)
            except requests.exceptions.RequestException as err:
                current_app.logger.warn(
                    '%s fetching reply context: %s for entry: %s',
                    type(err).__name__, proxied_reply_url, entry.permalink)

        if context:
            db.session.add(context)
            entry.reply_context.append(context)
示例#3
0
def convert_mf2util():
    def dates_to_string(json):
        if isinstance(json, dict):
            return {k: dates_to_string(v) for (k, v) in json.items()}
        if isinstance(json, list):
            return [dates_to_string(v) for v in json]
        if isinstance(json, datetime.date) or isinstance(json, datetime.datetime):
            return json.isoformat()
        return json

    url = request.args.get('url')
    as_feed = request.args.get('as-feed')
    op = request.args.get('op')
    if url:
        try:
            d = mf2py.parse(url=url)
            if op == 'post-type-discovery':
                entry = mf2util.find_first_entry(d, ['h-entry', 'h-event'])
                return jsonify({'type': mf2util.post_type_discovery(entry)})
                
            if as_feed == 'true' or mf2util.find_first_entry(d, ['h-feed']):
                json = mf2util.interpret_feed(d, url)
            else:
                json = mf2util.interpret(d, url)
            return jsonify(dates_to_string(json))
        except:
            current_app.logger.exception('running mf2util service')
            return jsonify({'error': str(sys.exc_info()[0])})

    return """
示例#4
0
def create_context(url):
    for context in hooks.fire('create-context', url):
        if context:
            return context

    context = None
    response = None
    try:
        response = util.fetch_html(url)
        response.raise_for_status()

        context = Context.query.filter_by(url=url).first()
        app.logger.debug('checked for pre-existing context for this url: %s', context)
        blob = mf2py.Parser(doc=response.text, url=url).to_dict()
        if blob:
            app.logger.debug('parsed successfully by mf2py: %s', url)
            entry = mf2util.interpret(blob, url)
            if entry: 
                app.logger.debug('parsed successfully by mf2util: %s', url)
                published = entry.get('published')
                content = util.clean_foreign_html(entry.get('content', ''))
                content_plain = util.format_as_text(
                    content, link_fn=lambda a: a)

                title = entry.get('name')
                author_name = entry.get('author', {}).get('name', '')
                author_image = entry.get('author', {}).get('photo')

                permalink = entry.get('url')
                if not permalink or not isinstance(permalink, str):
                    permalink = url

                context = Context()
                context.url = url
                context.permalink = permalink
                context.author_name = author_name
                context.author_url = entry.get('author', {}).get('url', '')
                context.author_image = author_image
                context.content = content
                context.content_plain = content_plain
                context.published = published
                context.title = title
    except:
        app.logger.exception(
            'Could not fetch context for url %s, received response %s',
            url, response)

    if not context:
        app.logger.debug('Generating default context: %s', url)
        context = Context()
        context.url = context.permalink = url
        if response:
            html = response.text
            soup = bs4.BeautifulSoup(html)
            if soup.title:
                app.logger.debug('Found title: %s', soup.title.string)
                context.title = soup.title.string

    return context
示例#5
0
def test_location_top_level():
    """Test the location algorithm with top level properties."""
    parsed = load_test('location_top_level')
    result = mf2util.interpret(parsed, 'http://example.com/')
    assert result['location'] == {
        'latitude': '37.83',
        'longitude': '-122.25',
    }
示例#6
0
def test_article_naive_datetime():
    parsed = load_test('article_naive_datetime')
    result = mf2util.interpret(
        parsed, 'http://tantek.com/2014/120/b1/markup-people-focused-mobile-communication')
    assert result['type'] == 'entry'
    assert result['name'] == 'Markup For People Focused Mobile Communication'
    assert '<h2>Action labels not app names</h2>' in result['content']
    assert result['published'] == datetime(2014, 4, 30, 12, 11)
    assert result['updated'] == datetime(2014, 4, 30, 12, 11)
示例#7
0
def test_location_geo():
    """Test the location algorithm with an h-geo."""
    parsed = load_test('location_h-geo')
    result = mf2util.interpret(parsed, 'http://example.com/')
    assert result['location'] == {
        'altitude': '123.0',
        'latitude': '37.83',
        'longitude': '-122.25',
    }
示例#8
0
def test_article_naive_datetime():
    parsed = load_test('article_naive_datetime')
    result = mf2util.interpret(
        parsed, 'http://tantek.com/2014/120/b1/markup-people-focused-mobile-communication')
    assert result['type'] == 'entry'
    assert result['name'] == 'Markup For People Focused Mobile Communication'
    assert '<h2>Action labels not app names</h2>' in result['content']
    assert result['published'] == datetime(2014, 4, 30, 12, 11)
    assert result['updated'] == datetime(2014, 4, 30, 12, 11)
示例#9
0
def test_location_adr():
    """Test the location algorithm with an h-adr."""
    parsed = load_test('location_h-adr')
    result = mf2util.interpret(parsed, 'http://example.com/')
    assert result['location'] == {
        'street-address': '17 Austerstræti',
        'locality': 'Reykjavík',
        'country-name': 'Iceland',
        'postal-code': '107',
        'name': '17 Austerstræti Reykjavík Iceland 107',
    }
示例#10
0
def test_location_hcard():
    """Test the location algorithm with an h-card.

    https://indieweb.org/location#How_to_determine_the_location_of_a_microformat
    """
    parsed = load_test('location_h-card')
    result = mf2util.interpret(parsed, 'http://example.com/')
    assert result['location'] == {
        'name': 'Timeless Coffee Roasters',
        'latitude': '37.83',
        'longitude': '-122.25',
    }
示例#11
0
def test_article_two_published_dates():
    """Test for a case that was throwing exceptions. Could not interpret
    datetime on posts with two dt-published dates because I was
    concatenating them. Should just take the first instead.
    """
    parsed = load_test('article_two_published_dates')
    result = mf2util.interpret(
        parsed, 'article.html')
    assert result['type'] == 'entry'
    assert result['name'] == 'Test Article with Two Published Dates'
    assert result['published'].replace(tzinfo=None) == datetime(2014, 4, 30, 12, 11, 00)
    assert result['published'].utcoffset() == timedelta(hours=-8)
示例#12
0
def test_article_two_published_dates():
    """Test for a case that was throwing exceptions. Could not interpret
    datetime on posts with two dt-published dates because I was
    concatenating them. Should just take the first instead.
    """
    parsed = load_test('article_two_published_dates')
    result = mf2util.interpret(
        parsed, 'article.html')
    assert result['type'] == 'entry'
    assert result['name'] == 'Test Article with Two Published Dates'
    assert result['published'].replace(tzinfo=None) == datetime(2014, 4, 30, 12, 11, 00)
    assert result['published'].utcoffset() == timedelta(hours=-8)
示例#13
0
def test_event():
    # HWC event from werd.io
    parsed = load_test('hwc-event')
    result = mf2util.interpret(
        parsed, 'http://werd.io/2014/homebrew-website-club-4')

    assert result['type'] == 'event'
    assert result['name'] == 'Homebrew Website Club'
    assert 'Are you building your own website?' in result['content']
    assert result['start'].replace(tzinfo=None) == datetime(2014, 5, 7, 18, 30)
    assert result['start'].utcoffset() == timedelta(hours=0)
    assert result['end'].replace(tzinfo=None) == datetime(2014, 5, 7, 19, 30)
    assert result['end'].utcoffset() == timedelta(hours=0)
示例#14
0
def extract_mf2_context(context, doc, url):
    """ Gets Microformats2 data from the given document
    """
    cached_mf2 = {}

    # used by authorship algorithm
    def fetch_mf2(url):
        if url in cached_mf2:
            return cached_mf2[url]
        p = mf2py.parse(url=url)
        cached_mf2[url] = p
        return p

    blob = mf2py.parse(doc=doc, url=url)
    cached_mf2[url] = blob

    if blob:
        current_app.logger.debug('parsed successfully by mf2py: %s', url)
        entry = mf2util.interpret(blob, url, fetch_mf2_func=fetch_mf2)
        if entry:
            current_app.logger.debug(
                'parsed successfully by mf2util: %s', url)
            published = entry.get('published')
            content = util.clean_foreign_html(entry.get('content', ''))
            content_plain = util.format_as_text(
                content, link_fn=lambda a: a)

            title = entry.get('name')
            if title and len(title) > 512:
                # FIXME is there a db setting to do this automatically?
                title = title[:512]
            author_name = entry.get('author', {}).get('name', '')
            author_image = entry.get('author', {}).get('photo')

            permalink = entry.get('url')
            if not permalink or not isinstance(permalink, str):
                permalink = url

            context.url = url
            context.permalink = permalink
            context.author_name = author_name
            context.author_url = entry.get('author', {}).get('url', '')
            context.author_image = author_image
            context.content = content
            context.content_plain = content_plain
            context.published = published
            context.title = title

    return context
示例#15
0
def test_p_content():
    """make sure p-content (instead of the usual e-content) doesn't cause
    us to throw an exception
    """
    parsed = {"items": [{"properties": {"author": [{"properties": {"name": ["Kyle"],
                                                                   "url": ["https://kylewm.com"]},
                                                    "type": ["h-card"], "value": "Kyle"}],
                                        "content": ["Thanks for hosting!"],
                                        "in-reply-to": ["https://snarfed.org/2014-06-16_homebrew-website-club-at-quip"],
                                        "name": ["I'm attending\n Homebrew Website Club at Quip\n Thanks for hosting!\n Kyle"],
                                        "rsvp": ["yes"]},
                         "type": ["h-entry"]}],
              "rel-urls": {}, "rels": {}}
    result = mf2util.interpret(parsed, 'http://kylewm.com/test/rsvp.html')
    assert 'Thanks for hosting!' == result.get('content')
示例#16
0
def test_event():
    # HWC event from werd.io
    parsed = load_test('hwc-event')
    result = mf2util.interpret(
        parsed, 'http://werd.io/2014/homebrew-website-club-4')

    assert result['type'] == 'event'
    assert result['name'] == 'Homebrew Website Club'
    assert 'Are you building your own website?' in result['content']
    assert result['start'].replace(tzinfo=None) == datetime(2014, 5, 7, 18, 30)
    assert result['start'].utcoffset() == timedelta(hours=0)
    assert result['end'].replace(tzinfo=None) == datetime(2014, 5, 7, 19, 30)
    assert result['end'].utcoffset() == timedelta(hours=0)
    assert result['location'] == {
        'name': 'Mozilla SF, 1st floor, 2 Harrison st. (at Embarcadero), San Francisco, CA ',
    }
示例#17
0
def extract_mf2_context(context, doc, url):
    """ Gets Microformats2 data from the given document
    """
    cached_mf2 = {}

    # used by authorship algorithm
    def fetch_mf2(url):
        if url in cached_mf2:
            return cached_mf2[url]
        p = mf2py.parse(url=url)
        cached_mf2[url] = p
        return p

    blob = mf2py.parse(doc=doc, url=url)
    cached_mf2[url] = blob

    if blob:
        current_app.logger.debug('parsed successfully by mf2py: %s', url)
        entry = mf2util.interpret(blob, url, fetch_mf2_func=fetch_mf2)
        if entry:
            current_app.logger.debug('parsed successfully by mf2util: %s', url)
            published = entry.get('published')
            content = util.clean_foreign_html(entry.get('content', ''))
            content_plain = util.format_as_text(content, link_fn=lambda a: a)

            title = entry.get('name')
            if title and len(title) > 512:
                # FIXME is there a db setting to do this automatically?
                title = title[:512]
            author_name = entry.get('author', {}).get('name', '')
            author_image = entry.get('author', {}).get('photo')

            permalink = entry.get('url')
            if not permalink or not isinstance(permalink, str):
                permalink = url

            context.url = url
            context.permalink = permalink
            context.author_name = author_name
            context.author_url = entry.get('author', {}).get('url', '')
            context.author_image = author_image
            context.content = content
            context.content_plain = content_plain
            context.published = published
            context.title = title

    return context
示例#18
0
def convert_mf2util():
    def dates_to_string(json):
        if isinstance(json, dict):
            return {k: dates_to_string(v) for (k, v) in json.items()}
        if isinstance(json, list):
            return [dates_to_string(v) for v in json]
        if isinstance(json, datetime.date) or isinstance(json, datetime.datetime):
            return json.isoformat()
        return json

    url = request.args.get('url')
    if url:
        d = mf2py.Parser(url=url).to_dict()
        if mf2util.find_first_entry(d, ['h-feed']):
            json = mf2util.interpret_feed(d, url)
        else:
            json = mf2util.interpret(d, url)
        return jsonify(dates_to_string(json))
    return """
示例#19
0
文件: tasks.py 项目: Lancey6/woodwind
def fetch_reply_context(entry_id, in_reply_to, now):
    with flask_app():
        entry = Entry.query.get(entry_id)
        context = Entry.query\
                       .join(Entry.feed)\
                       .filter(Entry.permalink==in_reply_to, Feed.type == 'html')\
                       .first()

        if not context:
            current_app.logger.info('fetching in-reply-to url: %s',
                                    in_reply_to)
            parsed = mf2util.interpret(
                mf2py.parse(url=proxy_url(in_reply_to)), in_reply_to)
            if parsed:
                context = hentry_to_entry(parsed, in_reply_to, False, now)

        if context:
            entry.reply_context.append(context)
            db.session.commit()
示例#20
0
def fetch_context():
    url = request.args.get('url')
    if not url:
        return make_response(
            jsonify({
                'error': 'missing_url',
                'message': "Missing 'url' query parameter",
            }), 400)

    # TODO cache everything. check newer urls more frequently than
    # older urls. be careful not to overwrite previous good responses
    # with failure.

    url = maybe_proxy(url)
    resp = fetch(url)

    if resp.status_code // 100 != 2:
        return make_response(
            jsonify({
                'error': 'fetch_failed',
                'message': 'Failed to fetch resource at ' + url,
                'response': resp.text,
                'code': resp.status_code,
            }), resp.status_code)

    parsed = mf2py.parse(
        doc=resp.text if 'content-type' in resp.headers else resp.content,
        url=url)
    entry = mf2util.interpret(parsed, url, want_json=True)

    blob = {}
    if entry:
        blob['data'] = entry

    cb = request.args.get('callback')
    if cb:  # jsonp
        resp = make_response('{}({})'.format(cb, json.dumps(blob)))
        resp.headers['content-type'] = 'application/javascript; charset=utf-8'
        return resp

    return jsonify(blob)
示例#21
0
def fetch_context():
    url = request.args.get('url')
    if not url:
        return make_response(jsonify({
            'error': 'missing_url',
            'message': "Missing 'url' query parameter",
        }), 400)

    # TODO cache everything. check newer urls more frequently than
    # older urls. be careful not to overwrite previous good responses
    # with failure.

    url = maybe_proxy(url)
    resp = fetch(url)

    if resp.status_code // 100 != 2:
        return make_response(jsonify({
            'error': 'fetch_failed',
            'message': 'Failed to fetch resource at ' + url,
            'response': resp.text,
            'code': resp.status_code,
        }), resp.status_code)

    parsed = mf2py.parse(
        doc=resp.text if 'content-type' in resp.headers else resp.content,
        url=url)
    entry = mf2util.interpret(parsed, url, want_json=True)

    blob = {}
    if entry:
        blob['data'] = entry

    cb = request.args.get('callback')
    if cb:  # jsonp
        resp = make_response('{}({})'.format(cb, json.dumps(blob)))
        resp.headers['content-type'] = 'application/javascript; charset=utf-8'
        return resp

    return jsonify(blob)
示例#22
0
def test_comment_and_like():
    parsed = load_test('note_with_comment_and_like')
    result = mf2util.interpret(
        parsed, 'https://kylewm.com/2015/10/big-thing-missing-from-my-indieweb-experience-is')
    assert result['type'] == 'entry'

    assert len(result['comment']) == 1

    assert result['comment'][0]['type'] == 'cite'
    assert result['comment'][0]['author'] == {
        'name': 'Aaron Parecki',
        'photo': 'https://twitter.com/aaronpk/profile_image?size=original',
        'url': 'http://aaronparecki.com',
    }
    assert result['comment'][0]['content'] == '<a href=\"https://twitter.com/kylewmahan\">@kylewmahan</a> I usually click through a couple levels up looking to see if any of the URLs up the chain show comments <a href=\"https://twitter.com/search?q=%23indieweb\">#indieweb</a>'

    assert len(result['like']) == 1
    assert result['like'][0]['type'] == 'cite'
    assert result['like'][0]['author'] == {
        'name': '',
        'url': 'https://twitter.com/benwerd',
        'photo': 'https://kylewm.com/imageproxy?url=https%3A%2F%2Ftwitter.com%2Fbenwerd%2Fprofile_image%3Fsize%3Doriginal&size=48&sig=fde7ce5635f5ea132a2545ff5c7d3d33',
    }
示例#23
0
def convert_mf2util():
    def dates_to_string(json):
        if isinstance(json, dict):
            return {k: dates_to_string(v) for (k, v) in json.items()}
        if isinstance(json, list):
            return [dates_to_string(v) for v in json]
        if isinstance(json, datetime.date) or isinstance(json, datetime.datetime):
            return json.isoformat()
        return json

    url = request.args.get('url')
    as_feed = request.args.get('as-feed')
    if url:
        try:
            d = mf2py.parse(url=url)
            if as_feed == 'true' or mf2util.find_first_entry(d, ['h-feed']):
                json = mf2util.interpret_feed(d, url)
            else:
                json = mf2util.interpret(d, url)
            return jsonify(dates_to_string(json))
        except:
            return jsonify({'error': str(sys.exc_info()[0])})

    return """
示例#24
0
def test_no_p_name():
    parsed = load_test('article_no_p-name')
    result = mf2util.interpret(
        parsed, 'http://example.com')
    assert 'Give me crayons and I will draw a rocketship.' in result['content']
    assert 'name' not in result
示例#25
0
def test_convert_relative_paths():
    parsed = load_test('relative_paths')
    result = mf2util.interpret(
        parsed, 'http://example.com/blog/', base_href='../')
    assert result['content'] == 'This is an <img alt="alt text" title="the title" src="http://example.com/static/img.jpg"/> example document with <a href="http://example.com/relative_paths.html">relative paths</a>.'
示例#26
0
def test_convert_relative_paths():
    parsed = load_test('relative_paths')
    result = mf2util.interpret(
        parsed, 'http://example.com')
    assert result['content'] == 'This is an <img alt="alt text" title="the title" src="http://example.com/static/img.jpg"/> example document with <a href="http://example.com/relative_paths.html">relative paths</a>.'
示例#27
0
def test_no_p_name():
    parsed = load_test('article_no_p-name')
    result = mf2util.interpret(
        parsed, 'http://example.com')
    assert 'Give me crayons and I will draw a rocketship.' in result['content']
    assert 'name' not in result
示例#28
0
def test_unusual_properties():
    parsed = load_test('unusual_properties')
    result = mf2util.interpret(parsed, 'https://example.com/')
    assert 'Rocky Raccoon' == result.get('name')
    assert 'https://foo.bar/' == result.get('url')
    assert 'https://foo.bar/' == result.get('uid')
示例#29
0
import mf2py
import mf2util
import pprint

source_url = r'https://brid.gy/comment/twitter/desmondrivet/1117876830478852096/1118148721034891264'
target_url = r'https://desmondrivet.com/2019/04/15/20190415154611'

parsed = mf2py.Parser(url=source_url).to_dict()
comment = mf2util.interpret_comment(parsed, source_url, [target_url])
general = mf2util.interpret(parsed, source_url)

pprint.pprint(parsed)
print('-----\n')
pprint.pprint(comment)
示例#30
0
def json_to_object(mf2, actor=None):
    """Converts microformats2 JSON to an ActivityStreams object.

  Args:
    mf2: dict, decoded JSON microformats2 object
    actor: optional author AS actor object. usually comes from a rel="author"
      link. if mf2 has its own author, that will override this.

  Returns:
    dict, ActivityStreams object
  """
    if not mf2 or not isinstance(mf2, dict):
        return {}

    mf2 = copy.copy(mf2)
    props = mf2.setdefault('properties', {})
    prop = first_props(props)
    rsvp = prop.get('rsvp')
    rsvp_verb = 'rsvp-%s' % rsvp if rsvp else None
    author = json_to_object(prop['author']) if prop.get('author') else actor

    # maps mf2 type to ActivityStreams objectType and optional verb.
    mf2_type_to_as_type = {
        'rsvp': ('activity', rsvp_verb),
        'invite': ('activity', 'invite'),
        'repost': ('activity', 'share'),
        'like': ('activity', 'like'),
        'reply': ('comment', None),
        'person': ('person', None),
        'location': ('place', None),
        'note': ('note', None),
        'article': ('article', None),
    }

    mf2_types = mf2.get('type') or []
    if 'h-geo' in mf2_types or 'p-location' in mf2_types:
        mf2_type = 'location'
    else:
        # mf2 'photo' type is a note or article *with* a photo, but AS 'photo' type
        # *is* a photo. so, special case photo type to fall through to underlying
        # mf2 type without photo.
        # https://github.com/snarfed/bridgy/issues/702
        without_photo = copy.deepcopy(mf2)
        without_photo.get('properties', {}).pop('photo', None)
        mf2_type = mf2util.post_type_discovery(without_photo)

    as_type, as_verb = mf2_type_to_as_type.get(mf2_type, (None, None))

    def absolute_urls(prop):
        return [
            {
                'url': url
            } for url in get_string_urls(props.get(prop, []))
            # filter out relative and invalid URLs (mf2py gives absolute urls)
            if urlparse.urlparse(url).netloc
        ]

    urls = props.get('url') and get_string_urls(props.get('url'))

    obj = {
        'id':
        prop.get('uid'),
        'objectType':
        as_type,
        'verb':
        as_verb,
        'published':
        prop.get('published', ''),
        'updated':
        prop.get('updated', ''),
        'displayName':
        get_text(prop.get('name')),
        'summary':
        get_text(prop.get('summary')),
        'content':
        get_html(prop.get('content')),
        'url':
        urls[0] if urls else None,
        'urls': [{
            'value': u
        } for u in urls] if urls and len(urls) > 1 else None,
        'image':
        absolute_urls('photo'),
        'stream':
        absolute_urls('video'),
        'location':
        json_to_object(prop.get('location')),
        'replies': {
            'items': [json_to_object(c) for c in props.get('comment', [])]
        },
        'tags': [{
            'objectType': 'hashtag',
            'displayName': cat
        } if isinstance(cat, basestring) else json_to_object(cat)
                 for cat in props.get('category', [])],
    }

    # mf2util uses the indieweb/mf2 location algorithm to collect location properties.
    interpreted = mf2util.interpret({'items': [mf2]}, None)
    if interpreted:
        loc = interpreted.get('location')
        if loc:
            obj['location']['objectType'] = 'place'
            lat, lng = loc.get('latitude'), loc.get('longitude')
            if lat and lng:
                try:
                    obj['location']['latitude'] = float(lat)
                    obj['location']['longitude'] = float(lng)
                    # TODO fill in 'position', maybe using Source.postprocess_object?
                except ValueError:
                    logging.warn(
                        'Could not convert latitude/longitude (%s, %s) to decimal',
                        lat, lng)

    if as_type == 'activity':
        objects = []
        for target in itertools.chain.from_iterable(
                props.get(field, [])
                for field in ('like', 'like-of', 'repost', 'repost-of',
                              'in-reply-to', 'invitee')):
            t = json_to_object(target) if isinstance(target, dict) else {
                'url': target
            }
            # eliminate duplicates from redundant backcompat properties
            if t not in objects:
                objects.append(t)
        obj.update({
            'object': objects[0] if len(objects) == 1 else objects,
            'actor': author,
        })
    else:
        obj.update({
            'inReplyTo': [{
                'url': url
            } for url in get_string_urls(props.get('in-reply-to', []))],
            'author':
            author,
        })

    return util.trim_nulls(obj)
示例#31
0
def json_to_object(mf2, actor=None, fetch_mf2=False):
    """Converts microformats2 JSON to an ActivityStreams object.

  Args:
    mf2: dict, decoded JSON microformats2 object
    actor: optional author AS actor object. usually comes from a rel="author"
      link. if mf2 has its own author, that will override this.
    fetch_mf2: boolean, whether to fetch additional pages via HTTP if necessary,
      e.g. to determine authorship: https://indieweb.org/authorship

  Returns:
    dict, ActivityStreams object
  """
    if not mf2 or not isinstance(mf2, dict):
        return {}

    mf2 = copy.copy(mf2)
    props = mf2.setdefault('properties', {})
    prop = first_props(props)
    rsvp = prop.get('rsvp')

    # convert author
    mf2_author = prop.get('author')
    if mf2_author and isinstance(mf2_author, dict):
        author = json_to_object(mf2_author)
    else:
        # the author h-card may be on another page. run full authorship algorithm:
        # https://indieweb.org/authorship
        def fetch(url):
            return mf2py.parse(util.requests_get(url).text, url=url)

        author = mf2util.find_author(
            {'items': [mf2]},
            hentry=mf2,
            fetch_mf2_func=fetch if fetch_mf2 else None)
        if author:
            author = {
                'objectType': 'person',
                'url': author.get('url'),
                'displayName': author.get('name'),
                'image': [{
                    'url': author.get('photo')
                }],
            }

    if not author:
        author = actor

    mf2_types = mf2.get('type') or []
    if 'h-geo' in mf2_types or 'p-location' in mf2_types:
        mf2_type = 'location'
    else:
        # mf2 'photo' type is a note or article *with* a photo, but AS 'photo' type
        # *is* a photo. so, special case photo type to fall through to underlying
        # mf2 type without photo.
        # https://github.com/snarfed/bridgy/issues/702
        without_photo = copy.deepcopy(mf2)
        without_photo.get('properties', {}).pop('photo', None)
        mf2_type = mf2util.post_type_discovery(without_photo)

    as_type, as_verb = MF2_TO_AS_TYPE_VERB.get(mf2_type, (None, None))
    if rsvp:
        as_verb = 'rsvp-%s' % rsvp

    # special case GitHub issues that are in-reply-to the repo or its issues URL
    in_reply_tos = get_string_urls(props.get('in-reply-to', []))
    for url in in_reply_tos:
        if re.match(r'^https?://github.com/[^/]+/[^/]+(/issues)?/?$', url):
            as_type = 'issue'

    def absolute_urls(prop):
        return [
            url for url in get_string_urls(props.get(prop, []))
            # filter out relative and invalid URLs (mf2py gives absolute urls)
            if urllib.parse.urlparse(url).netloc
        ]

    urls = props.get('url') and get_string_urls(props.get('url'))

    # quotations: https://indieweb.org/quotation#How_to_markup
    attachments = [
        json_to_object(quote)
        for quote in mf2.get('children', []) + props.get('quotation-of', [])
        if isinstance(quote, dict) and 'h-cite' in set(quote.get('type', []))
    ]

    # audio and video
    for type in 'audio', 'video':
        attachments.extend({
            'objectType': type,
            'stream': {
                'url': url
            }
        } for url in get_string_urls(props.get(type, [])))

    obj = {
        'id':
        prop.get('uid'),
        'objectType':
        as_type,
        'verb':
        as_verb,
        'published':
        prop.get('published', ''),
        'updated':
        prop.get('updated', ''),
        'startTime':
        prop.get('start'),
        'endTime':
        prop.get('end'),
        'displayName':
        get_text(prop.get('name')),
        'summary':
        get_text(prop.get('summary')),
        'content':
        get_html(prop.get('content')),
        'url':
        urls[0] if urls else None,
        'urls': [{
            'value': u
        } for u in urls] if urls and len(urls) > 1 else None,
        'image': [{
            'url': url
        } for url in dedupe_urls(
            absolute_urls('photo') + absolute_urls('featured'))],
        'stream': [{
            'url': url
        } for url in absolute_urls('video')],
        'location':
        json_to_object(prop.get('location')),
        'replies': {
            'items': [json_to_object(c) for c in props.get('comment', [])]
        },
        'tags': [{
            'objectType': 'hashtag',
            'displayName': cat
        } if isinstance(cat, basestring) else json_to_object(cat)
                 for cat in props.get('category', [])],
        'attachments':
        attachments,
    }

    # mf2util uses the indieweb/mf2 location algorithm to collect location properties.
    interpreted = mf2util.interpret({'items': [mf2]}, None)
    if interpreted:
        loc = interpreted.get('location')
        if loc:
            obj['location']['objectType'] = 'place'
            lat, lng = loc.get('latitude'), loc.get('longitude')
            if lat and lng:
                try:
                    obj['location'].update({
                        'latitude': float(lat),
                        'longitude': float(lng),
                    })
                except ValueError:
                    logging.warn(
                        'Could not convert latitude/longitude (%s, %s) to decimal',
                        lat, lng)

    if as_type == 'activity':
        objects = []
        for target in itertools.chain.from_iterable(
                props.get(field, [])
                for field in ('like', 'like-of', 'repost', 'repost-of',
                              'in-reply-to', 'invitee')):
            t = json_to_object(target) if isinstance(target, dict) else {
                'url': target
            }
            # eliminate duplicates from redundant backcompat properties
            if t not in objects:
                objects.append(t)
        obj.update({
            'object': objects[0] if len(objects) == 1 else objects,
            'actor': author,
        })
    else:
        obj.update({
            'inReplyTo': [{
                'url': url
            } for url in in_reply_tos],
            'author': author,
        })

    return source.Source.postprocess_object(obj)
示例#32
0
def json_to_object(mf2, actor=None, fetch_mf2=False):
  """Converts a single microformats2 JSON item to an ActivityStreams object.

  Supports h-entry, h-event, h-card, and other single item times. Does *not* yet
  support h-feed.

  Args:
    mf2: dict, decoded JSON microformats2 object
    actor: optional author AS actor object. usually comes from a rel="author"
      link. if mf2 has its own author, that will override this.
    fetch_mf2: boolean, whether to fetch additional pages via HTTP if necessary,
      e.g. to determine authorship: https://indieweb.org/authorship

  Returns:
    dict, ActivityStreams object
  """
  if not mf2 or not isinstance(mf2, dict):
    return {}

  mf2 = copy.copy(mf2)
  props = mf2.setdefault('properties', {})
  prop = first_props(props)
  rsvp = prop.get('rsvp')

  # convert author
  mf2_author = prop.get('author')
  if mf2_author and isinstance(mf2_author, dict):
    author = json_to_object(mf2_author)
  else:
    # the author h-card may be on another page. run full authorship algorithm:
    # https://indieweb.org/authorship
    author = mf2util.find_author({'items': [mf2]}, hentry=mf2,
                                 fetch_mf2_func=util.fetch_mf2 if fetch_mf2 else None)
    if author:
      author = {
        'objectType': 'person',
        'url': author.get('url'),
        'displayName': author.get('name'),
        'image': [{'url': author.get('photo')}],
      }

  if not author:
    author = actor

  mf2_types = mf2.get('type') or []
  if 'h-geo' in mf2_types or 'p-location' in mf2_types:
    mf2_type = 'location'
  elif 'tag-of' in props:
    # TODO: remove once this is in mf2util
    # https://github.com/kylewm/mf2util/issues/18
    mf2_type = 'tag'
  elif 'follow-of' in props: # ditto
    mf2_type = 'follow'
  else:
    # mf2 'photo' type is a note or article *with* a photo, but AS 'photo' type
    # *is* a photo. so, special case photo type to fall through to underlying
    # mf2 type without photo.
    # https://github.com/snarfed/bridgy/issues/702
    without_photo = copy.deepcopy(mf2)
    without_photo.get('properties', {}).pop('photo', None)
    mf2_type = mf2util.post_type_discovery(without_photo)

  as_type, as_verb = MF2_TO_AS_TYPE_VERB.get(mf2_type, (None, None))
  if rsvp:
    as_verb = 'rsvp-%s' % rsvp

  # special case GitHub issues that are in-reply-to the repo or its issues URL
  in_reply_tos = get_string_urls(props.get('in-reply-to', []))
  for url in in_reply_tos:
    if re.match(r'^https?://github.com/[^/]+/[^/]+(/issues)?/?$', url):
      as_type = 'issue'

  def is_absolute(url):
    """Filter out relative and invalid URLs (mf2py gives absolute urls)."""
    return urllib.parse.urlparse(url).netloc

  urls = props.get('url') and get_string_urls(props.get('url'))

  # quotations: https://indieweb.org/quotation#How_to_markup
  attachments = [
    json_to_object(quote)
    for quote in mf2.get('children', []) + props.get('quotation-of', [])
    if isinstance(quote, dict) and 'h-cite' in set(quote.get('type', []))]

  # audio and video
  #
  # the duration mf2 property is still emerging. examples in the wild use both
  # integer seconds and ISO 8601 durations.
  # https://indieweb.org/duration
  # https://en.wikipedia.org/wiki/ISO_8601#Durations
  duration = prop.get('duration') or prop.get('length')
  if duration:
    if util.is_int(duration):
      duration = int(duration)
    else:
      parsed = util.parse_iso8601_duration(duration)
      if parsed:
        duration = int(parsed.total_seconds())
      else:
        logging.debug('Unknown format for length or duration %r', duration)
        duration = None


  stream = None
  bytes = size_to_bytes(prop.get('size'))
  for type in 'audio', 'video':
    atts = [{
      'objectType': type,
      'stream': {
        'url': url,
        # integer seconds: http://activitystrea.ms/specs/json/1.0/#media-link
        'duration': duration,
        # file size in bytes. nonstandard, not in AS1 or AS2
        'size': bytes,
      },
    } for url in get_string_urls(props.get(type, []))]
    attachments.extend(atts)
    if atts:
      stream = atts[0]['stream']

  obj = {
    'id': prop.get('uid'),
    'objectType': as_type,
    'verb': as_verb,
    'published': prop.get('published', ''),
    'updated': prop.get('updated', ''),
    'startTime': prop.get('start'),
    'endTime': prop.get('end'),
    'displayName': get_text(prop.get('name')),
    'summary': get_text(prop.get('summary')),
    'content': get_html(prop.get('content')),
    'url': urls[0] if urls else None,
    'urls': [{'value': u} for u in urls] if urls and len(urls) > 1 else None,
    # image is special cased below, to handle alt
    'stream': [stream],
    'location': json_to_object(prop.get('location')),
    'replies': {'items': [json_to_object(c) for c in props.get('comment', [])]},
    'tags': [{'objectType': 'hashtag', 'displayName': cat}
             if isinstance(cat, str)
             else json_to_object(cat)
             for cat in props.get('category', [])],
    'attachments': attachments,
  }

  # images, including alt text
  photo_urls = set()
  obj['image'] = []
  for photo in props.get('photo', []) + props.get('featured', []):
    url = photo
    alt = None
    if isinstance(photo, dict):
      photo = photo.get('properties') or photo
      url = get_first(photo, 'value') or get_first(photo, 'url')
      alt = get_first(photo, 'alt')
    if url and url not in photo_urls and is_absolute(url):
      photo_urls.add(url)
      obj['image'].append({'url': url, 'displayName': alt})

  # mf2util uses the indieweb/mf2 location algorithm to collect location properties.
  interpreted = mf2util.interpret({'items': [mf2]}, None)
  if interpreted:
    loc = interpreted.get('location')
    if loc:
      obj['location']['objectType'] = 'place'
      lat, lng = loc.get('latitude'), loc.get('longitude')
      if lat and lng:
        try:
          obj['location'].update({
            'latitude': float(lat),
            'longitude': float(lng),
          })
        except ValueError:
          logging.debug(
            'Could not convert latitude/longitude (%s, %s) to decimal', lat, lng)

  if as_type == 'activity':
    objects = []
    for target in itertools.chain.from_iterable(
        props.get(field, []) for field in (
          'follow-of', 'like', 'like-of', 'repost', 'repost-of', 'in-reply-to',
          'invitee')):
      t = json_to_object(target) if isinstance(target, dict) else {'url': target}
      # eliminate duplicates from redundant backcompat properties
      if t not in objects:
        objects.append(t)
    obj.update({
      'object': objects[0] if len(objects) == 1 else objects,
      'actor': author,
    })
    if as_verb == 'tag':
      obj['target'] = {'url': prop['tag-of']}
      if obj.get('object'):
        raise NotImplementedError(
          'Combined in-reply-to and tag-of is not yet supported.')
      obj['object'] = obj.pop('tags')
  else:
    obj.update({
      'inReplyTo': [{'url': url} for url in in_reply_tos],
      'author': author,
    })

  return source.Source.postprocess_object(obj)
示例#33
0
def json_to_object(mf2, actor=None, fetch_mf2=False):
  """Converts a single microformats2 JSON item to an ActivityStreams object.

  Supports h-entry, h-event, h-card, and other single item times. Does *not* yet
  support h-feed.

  Args:
    mf2: dict, decoded JSON microformats2 object
    actor: optional author AS actor object. usually comes from a rel="author"
      link. if mf2 has its own author, that will override this.
    fetch_mf2: boolean, whether to fetch additional pages via HTTP if necessary,
      e.g. to determine authorship: https://indieweb.org/authorship

  Returns:
    dict, ActivityStreams object
  """
  if not mf2 or not isinstance(mf2, dict):
    return {}

  mf2 = copy.copy(mf2)
  props = mf2.setdefault('properties', {})
  prop = first_props(props)
  rsvp = prop.get('rsvp')

  # convert author
  mf2_author = prop.get('author')
  if mf2_author and isinstance(mf2_author, dict):
    author = json_to_object(mf2_author)
  else:
    # the author h-card may be on another page. run full authorship algorithm:
    # https://indieweb.org/authorship
    def fetch(url):
      return mf2py.parse(util.requests_get(url).text, url=url, img_with_alt=True)
    author = mf2util.find_author(
      {'items': [mf2]}, hentry=mf2, fetch_mf2_func=fetch if fetch_mf2 else None)
    if author:
      author = {
        'objectType': 'person',
        'url': author.get('url'),
        'displayName': author.get('name'),
        'image': [{'url': author.get('photo')}],
      }

  if not author:
    author = actor

  mf2_types = mf2.get('type') or []
  if 'h-geo' in mf2_types or 'p-location' in mf2_types:
    mf2_type = 'location'
  elif 'tag-of' in props:
    # TODO: remove once this is in mf2util
    # https://github.com/kylewm/mf2util/issues/18
    mf2_type = 'tag'
  elif 'follow-of' in props: # ditto
    mf2_type = 'follow'
  else:
    # mf2 'photo' type is a note or article *with* a photo, but AS 'photo' type
    # *is* a photo. so, special case photo type to fall through to underlying
    # mf2 type without photo.
    # https://github.com/snarfed/bridgy/issues/702
    without_photo = copy.deepcopy(mf2)
    without_photo.get('properties', {}).pop('photo', None)
    mf2_type = mf2util.post_type_discovery(without_photo)

  as_type, as_verb = MF2_TO_AS_TYPE_VERB.get(mf2_type, (None, None))
  if rsvp:
    as_verb = 'rsvp-%s' % rsvp

  # special case GitHub issues that are in-reply-to the repo or its issues URL
  in_reply_tos = get_string_urls(props.get('in-reply-to', []))
  for url in in_reply_tos:
    if re.match(r'^https?://github.com/[^/]+/[^/]+(/issues)?/?$', url):
      as_type = 'issue'

  def is_absolute(url):
    """Filter out relative and invalid URLs (mf2py gives absolute urls)."""
    return urllib.parse.urlparse(url).netloc

  urls = props.get('url') and get_string_urls(props.get('url'))

  # quotations: https://indieweb.org/quotation#How_to_markup
  attachments = [
    json_to_object(quote)
    for quote in mf2.get('children', []) + props.get('quotation-of', [])
    if isinstance(quote, dict) and 'h-cite' in set(quote.get('type', []))]

  # audio and video
  for type in 'audio', 'video':
    attachments.extend({'objectType': type, 'stream': {'url': url}}
                       for url in get_string_urls(props.get(type, [])))

  obj = {
    'id': prop.get('uid'),
    'objectType': as_type,
    'verb': as_verb,
    'published': prop.get('published', ''),
    'updated': prop.get('updated', ''),
    'startTime': prop.get('start'),
    'endTime': prop.get('end'),
    'displayName': get_text(prop.get('name')),
    'summary': get_text(prop.get('summary')),
    'content': get_html(prop.get('content')),
    'url': urls[0] if urls else None,
    'urls': [{'value': u} for u in urls] if urls and len(urls) > 1 else None,
    # image is special cased below, to handle alt
    'stream': [{'url': url} for url in get_string_urls(props.get('video', []))],
    'location': json_to_object(prop.get('location')),
    'replies': {'items': [json_to_object(c) for c in props.get('comment', [])]},
    'tags': [{'objectType': 'hashtag', 'displayName': cat}
             if isinstance(cat, basestring)
             else json_to_object(cat)
             for cat in props.get('category', [])],
    'attachments': attachments,
  }

  # images, including alt text
  photo_urls = set()
  obj['image'] = []
  for photo in props.get('photo', []) + props.get('featured', []):
    url = photo
    alt = None
    if isinstance(photo, dict):
      photo = photo.get('properties') or photo
      url = get_first(photo, 'value') or get_first(photo, 'url')
      alt = get_first(photo, 'alt')
    if url and url not in photo_urls and is_absolute(url):
      photo_urls.add(url)
      obj['image'].append({'url': url, 'displayName': alt})

  # mf2util uses the indieweb/mf2 location algorithm to collect location properties.
  interpreted = mf2util.interpret({'items': [mf2]}, None)
  if interpreted:
    loc = interpreted.get('location')
    if loc:
      obj['location']['objectType'] = 'place'
      lat, lng = loc.get('latitude'), loc.get('longitude')
      if lat and lng:
        try:
          obj['location'].update({
            'latitude': float(lat),
            'longitude': float(lng),
          })
        except ValueError:
          logging.warn(
            'Could not convert latitude/longitude (%s, %s) to decimal', lat, lng)

  if as_type == 'activity':
    objects = []
    for target in itertools.chain.from_iterable(
        props.get(field, []) for field in (
          'follow-of', 'like', 'like-of', 'repost', 'repost-of', 'in-reply-to',
          'invitee')):
      t = json_to_object(target) if isinstance(target, dict) else {'url': target}
      # eliminate duplicates from redundant backcompat properties
      if t not in objects:
        objects.append(t)
    obj.update({
      'object': objects[0] if len(objects) == 1 else objects,
      'actor': author,
    })
    if as_verb == 'tag':
      obj['target'] = {'url': prop['tag-of']}
      if obj.get('object'):
        raise NotImplementedError(
          'Combined in-reply-to and tag-of is not yet supported.')
      obj['object'] = obj.pop('tags')
  else:
    obj.update({
      'inReplyTo': [{'url': url} for url in in_reply_tos],
      'author': author,
    })

  return source.Source.postprocess_object(obj)
示例#34
0
def create_dcontext(url):
    repost_preview = None
    # youtube embeds
    m = YOUTUBE_RE.match(url)
    if m:
        repost_preview = (
            """<iframe width="560" height="315" """
            """src="//www.youtube.com/embed/{}" frameborder="0" """
            """allowfullscreen></iframe>"""
            .format(m.group(1)))

    # instagram embeds
    m = INSTAGRAM_RE.match(url)
    if m:
        repost_preview = (
            """<iframe src="//instagram.com/p/{}/embed/" """
            """width="400" height="500" frameborder="0" scrolling="no" """
            """allowtransparency="true"></iframe>"""
            .format(m.group(1)))

    blob = archiver.load_json_from_archive(url)
    if blob:
        try:
            entry = mf2util.interpret(blob, url)
            pub_date = entry.get('published')

            content = entry.get('content', '')
            content_plain = format_as_text(content)

            if len(content_plain) < 512:
                content = bleach.clean(autolink(content), strip=True)
            else:
                content = (
                    jinja2.filters.do_truncate(content_plain, 512) +
                    ' <a class="u-url" href="{}">continued</a>'.format(url))

            title = entry.get('name', 'a post')
            if len(title) > 256:
                title = jinja2.filters.do_truncate(title, 256)

            author_name = bleach.clean(entry.get('author', {}).get('name', ''))
            author_image = entry.get('author', {}).get('photo')
            if author_image:
                author_image = local_mirror_resource(author_image)

            return DContext(
                url=url,
                permalink=entry.get('url', url),
                author_name=author_name,
                author_url=entry.get('author', {}).get('url', ''),
                author_image=author_image or url_for(
                    'static', filename=AUTHOR_PLACEHOLDER),
                content=content,
                repost_preview=repost_preview,
                pub_date=pub_date,
                pub_date_iso=isotime_filter(pub_date),
                pub_date_human=human_time(pub_date),
                title=title,
                deleted=False,
            )
        except:
            app.logger.exception('error interpreting %s', url)

    return DContext(
        url=url,
        permalink=url,
        author_name=None,
        author_url=None,
        author_image=None,
        content=None,
        repost_preview=repost_preview,
        pub_date=None,
        pub_date_iso=None,
        pub_date_human=None,
        title='a post',
        deleted=False,
    )
示例#35
0
def json_to_object(mf2, actor=None):
  """Converts microformats2 JSON to an ActivityStreams object.

  Args:
    mf2: dict, decoded JSON microformats2 object
    actor: optional author AS actor object. usually comes from a rel="author"
      link. if mf2 has its own author, that will override this.

  Returns:
    dict, ActivityStreams object
  """
  if not mf2 or not isinstance(mf2, dict):
    return {}

  mf2 = copy.copy(mf2)
  props = mf2.setdefault('properties', {})
  prop = first_props(props)
  rsvp = prop.get('rsvp')
  rsvp_verb = 'rsvp-%s' % rsvp if rsvp else None
  author = json_to_object(prop['author']) if prop.get('author') else actor

  # maps mf2 type to ActivityStreams objectType and optional verb.
  mf2_type_to_as_type = {
    'rsvp': ('activity', rsvp_verb),
    'invite': ('activity', 'invite'),
    'repost': ('activity', 'share'),
    'like': ('activity', 'like'),
    'reply': ('comment', None),
    'person': ('person', None),
    'location': ('place', None),
    'note': ('note', None),
    'article': ('article', None),
  }

  mf2_types = mf2.get('type') or []
  if 'h-geo' in mf2_types or 'p-location' in mf2_types:
    mf2_type = 'location'
  else:
    # mf2 'photo' type is a note or article *with* a photo, but AS 'photo' type
    # *is* a photo. so, special case photo type to fall through to underlying
    # mf2 type without photo.
    # https://github.com/snarfed/bridgy/issues/702
    without_photo = copy.deepcopy(mf2)
    without_photo.get('properties', {}).pop('photo', None)
    mf2_type = mf2util.post_type_discovery(without_photo)

  as_type, as_verb = mf2_type_to_as_type.get(mf2_type, (None, None))

  def absolute_urls(prop):
    return [{'url': url} for url in get_string_urls(props.get(prop, []))
            # filter out relative and invalid URLs (mf2py gives absolute urls)
            if urlparse.urlparse(url).netloc]

  urls = props.get('url') and get_string_urls(props.get('url'))

  obj = {
    'id': prop.get('uid'),
    'objectType': as_type,
    'verb': as_verb,
    'published': prop.get('published', ''),
    'updated': prop.get('updated', ''),
    'displayName': get_text(prop.get('name')),
    'summary': get_text(prop.get('summary')),
    'content': get_html(prop.get('content')),
    'url': urls[0] if urls else None,
    'urls': [{'value': u} for u in urls] if urls and len(urls) > 1 else None,
    'image': absolute_urls('photo'),
    'stream': absolute_urls('video'),
    'location': json_to_object(prop.get('location')),
    'replies': {'items': [json_to_object(c) for c in props.get('comment', [])]},
    'tags': [{'objectType': 'hashtag', 'displayName': cat}
             if isinstance(cat, basestring)
             else json_to_object(cat)
             for cat in props.get('category', [])],
  }

  # mf2util uses the indieweb/mf2 location algorithm to collect location properties.
  interpreted = mf2util.interpret({'items': [mf2]}, None)
  if interpreted:
    loc = interpreted.get('location')
    if loc:
      obj['location']['objectType'] = 'place'
      lat, lng = loc.get('latitude'), loc.get('longitude')
      if lat and lng:
        try:
          obj['location']['latitude'] = float(lat)
          obj['location']['longitude'] = float(lng)
          # TODO fill in 'position', maybe using Source.postprocess_object?
        except ValueError:
          logging.warn(
            'Could not convert latitude/longitude (%s, %s) to decimal', lat, lng)

  if as_type == 'activity':
    objects = []
    for target in itertools.chain.from_iterable(
        props.get(field, []) for field in (
          'like', 'like-of', 'repost', 'repost-of', 'in-reply-to', 'invitee')):
      t = json_to_object(target) if isinstance(target, dict) else {'url': target}
      # eliminate duplicates from redundant backcompat properties
      if t not in objects:
        objects.append(t)
    obj.update({
        'object': objects[0] if len(objects) == 1 else objects,
        'actor': author,
        })
  else:
    obj.update({
        'inReplyTo': [{'url': url} for url in get_string_urls(props.get('in-reply-to', []))],
        'author': author,
        })

  return util.trim_nulls(obj)
示例#36
0
def interpret_entry(
    parsed,
    source_url,
    base_href=None,
    hentry=None,
    use_rel_syndication=True,
    want_json=False,
    fetch_mf2_func=None,
):
    """
    Given a document containing an h-entry, return a dictionary.

        {'type': 'entry',
         'url': permalink of the document (may be different than source_url),
         'published': datetime or date,
         'updated': datetime or date,
         'name': title of the entry,
         'content': body of entry (contains HTML),
         'author': {
          'name': author name,
          'url': author url,
          'photo': author photo
         },
         'syndication': [
           'syndication url',
           ...
         ],
         'in-reply-to': [...],
         'like-of': [...],
         'repost-of': [...]}

    :param dict parsed: the result of parsing a document containing mf2 markup
    :param str source_url: the URL of the parsed document, used by the
      authorship algorithm
    :param str base_href: (optional) the href value of the base tag
    :param dict hentry: (optional) the item in the above document
      representing the h-entry. if provided, we can avoid a redundant
      call to find_first_entry
    :param boolean use_rel_syndication: (optional, default True) Whether
      to include rel=syndication in the list of syndication sources. Sometimes
      useful to set this to False when parsing h-feeds that erroneously include
      rel=syndication on each entry.
    :param boolean want_json: (optional, default False) if true, the result
      will be pure json with datetimes as strings instead of python objects
    :param callable fetch_mf2_func: (optional) function to fetch mf2 parsed
      output for a given URL.
    :return: a dict with some or all of the described properties

    """
    # find the h-entry if it wasn't provided
    if not hentry:
        hentry = util.find_first_entry(parsed, ["h-entry"])
        if not hentry:
            return {}

    result = _interpret_common_properties(
        parsed,
        source_url,
        base_href,
        hentry,
        use_rel_syndication,
        want_json,
        fetch_mf2_func,
    )
    if "h-cite" in hentry.get("type", []):
        result["type"] = "cite"
    else:
        result["type"] = "entry"

    # NOTE patch start
    if "category" in hentry["properties"]:
        result["category"] = hentry["properties"]["category"]
    if "pubkey" in hentry["properties"]:
        result["pubkey"] = hentry["properties"]["pubkey"]
    if "vote" in hentry["properties"]:
        result["vote"] = hentry["properties"]["vote"]
    # NOTE patch end

    title = util.get_plain_text(hentry["properties"].get("name"))
    if title and util.is_name_a_title(title, result.get("content-plain")):
        result["name"] = title

    for prop in (
        "in-reply-to",
        "like-of",
        "repost-of",
        "bookmark-of",
        "vote-on",
        "comment",
        "like",
        "repost",
    ):  # NOTE added vote-on
        for url_val in hentry["properties"].get(prop, []):
            if isinstance(url_val, dict):
                result.setdefault(prop, []).append(
                    util.interpret(
                        parsed,
                        source_url,
                        base_href,
                        url_val,
                        use_rel_syndication=False,
                        want_json=want_json,
                        fetch_mf2_func=fetch_mf2_func,
                    )
                )
            else:
                result.setdefault(prop, []).append(
                    {
                        "url": url_val,
                    }
                )

    return result