def _find_feed_items(feed_url, feed_doc): """Extract feed items from a given URL and document. If the top-level h-* item is an h-feed, return its children. Otherwise, returns the top-level items. Args: feed_url: a string. the URL passed to mf2py parser feed_doc: a string or BeautifulSoup object. document is passed to mf2py parser Returns: a list of dicts, each one representing an mf2 h-* item """ parsed = util.mf2py_parse(feed_doc, feed_url) feeditems = parsed['items'] hfeeds = mf2util.find_all_entries(parsed, ('h-feed', )) if hfeeds: feeditems = list( itertools.chain.from_iterable( hfeed.get('children', []) for hfeed in hfeeds)) else: logging.debug('No h-feed found, fallback to top-level h-entrys.') if len(feeditems) > MAX_FEED_ENTRIES: logging.info('%s has %s entries! only processing the first %s.', feed_url, len(feeditems), MAX_FEED_ENTRIES) feeditems = feeditems[:MAX_FEED_ENTRIES] return feeditems
def resolve_profile_url(url, resolve=True): """Resolves a profile URL to be added to a source. Args: url: string resolve: boolean, whether to make HTTP requests to follow redirects, etc. Returns: string, resolved URL, or None """ final, _, ok = util.get_webmention_target(url, resolve=resolve) if not ok: return None final = final.lower() if util.schemeless(final).startswith(util.schemeless(url.lower())): # redirected to a deeper path. use the original higher level URL. #652 final = url # If final has a path segment check if root has a matching rel=me. match = re.match(r'^(https?://[^/]+)/.+', final) if match and resolve: root = match.group(1) try: resp = util.requests_get(root) resp.raise_for_status() data = util.mf2py_parse(resp.text, root) me_urls = data.get('rels', {}).get('me', []) if final in me_urls: final = root except requests.RequestException: logging.warning("Couldn't fetch %s, preserving path in %s", root, final, exc_info=True) return final
def _find_feed_items(feed_url, feed_doc): """Extract feed items from a given URL and document. If the top-level h-* item is an h-feed, return its children. Otherwise, returns the top-level items. Args: feed_url: a string. the URL passed to mf2py parser feed_doc: a string or BeautifulSoup object. document is passed to mf2py parser Returns: a list of dicts, each one representing an mf2 h-* item """ parsed = util.mf2py_parse(feed_doc, feed_url) feeditems = parsed['items'] hfeeds = mf2util.find_all_entries(parsed, ('h-feed',)) if hfeeds: feeditems = list(itertools.chain.from_iterable( hfeed.get('children', []) for hfeed in hfeeds)) else: logging.debug('No h-feed found, fallback to top-level h-entrys.') if len(feeditems) > MAX_FEED_ENTRIES: logging.info('%s has %s entries! only processing the first %s.', feed_url, len(feeditems), MAX_FEED_ENTRIES) feeditems = feeditems[:MAX_FEED_ENTRIES] return feeditems
def _urls_and_domains(self, auth_entity, user_url): """Returns this user's valid (not webmention-blacklisted) URLs and domains. Converts the auth entity's user_json to an ActivityStreams actor and uses its 'urls' and 'url' fields. May be overridden by subclasses. Args: auth_entity: :class:`oauth_dropins.models.BaseAuth` user_url: string, optional URL passed in when authorizing Returns: ([string url, ...], [string domain, ...]) """ actor = self.gr_source.user_to_actor(json.loads(auth_entity.user_json)) logging.debug('Converted to actor: %s', json.dumps(actor, indent=2)) candidates = util.trim_nulls( util.uniquify([user_url] + microformats2.object_urls(actor))) if len(candidates) > MAX_AUTHOR_URLS: logging.info( 'Too many profile links! Only resolving the first %s: %s', MAX_AUTHOR_URLS, candidates) urls = [] for i, url in enumerate(candidates): final, domain, ok = util.get_webmention_target( url, resolve=i < MAX_AUTHOR_URLS) if ok: final = final.lower() if util.schemeless(final).startswith( util.schemeless(url.lower())): # redirected to a deeper path. use the original higher level URL. #652 final = url # If final has a path segment check if root has a matching rel=me. match = re.match(r'^(https?://[^/]+)/.+', final) if match and i < MAX_AUTHOR_URLS: root = match.group(1) resp = util.requests_get(root) resp.raise_for_status() data = util.mf2py_parse(resp.text, root) me_urls = data.get('rels', {}).get('me', []) if final in me_urls: final = root urls.append(final) urls = util.dedupe_urls(urls) # normalizes domains to lower case domains = [util.domain_from_link(url) for url in urls] return urls, domains
def test_user_page_publish_url_with_unicode_char(self): """Check the custom mf2 we render on social user pages.""" self.sources[0].features = ['publish'] self.sources[0].put() url = u'https://ptt.com/ransomw…ocks-user-access/' Publish(parent=PublishedPage(id=url.encode('utf-8')).key, source=self.sources[0].key).put() user_url = self.sources[0].bridgy_path() resp = app.application.get_response(user_url) self.assertEquals(200, resp.status_int) parsed = util.mf2py_parse(resp.body, user_url) publish = parsed['items'][0]['children'][0]
def _urls_and_domains(self, auth_entity, user_url): """Returns this user's valid (not webmention-blacklisted) URLs and domains. Converts the auth entity's user_json to an ActivityStreams actor and uses its 'urls' and 'url' fields. May be overridden by subclasses. Args: auth_entity: :class:`oauth_dropins.models.BaseAuth` user_url: string, optional URL passed in when authorizing Returns: ([string url, ...], [string domain, ...]) """ actor = self.gr_source.user_to_actor(json.loads(auth_entity.user_json)) logging.debug('Converted to actor: %s', json.dumps(actor, indent=2)) candidates = util.trim_nulls(util.uniquify( [user_url] + microformats2.object_urls(actor))) if len(candidates) > MAX_AUTHOR_URLS: logging.info('Too many profile links! Only resolving the first %s: %s', MAX_AUTHOR_URLS, candidates) urls = [] for i, url in enumerate(candidates): final, domain, ok = util.get_webmention_target(url, resolve=i < MAX_AUTHOR_URLS) if ok: final = final.lower() if util.schemeless(final).startswith(util.schemeless(url.lower())): # redirected to a deeper path. use the original higher level URL. #652 final = url # If final has a path segment check if root has a matching rel=me. match = re.match(r'^(https?://[^/]+)/.+', final) if match and i < MAX_AUTHOR_URLS: root = match.group(1) resp = util.requests_get(root) resp.raise_for_status() data = util.mf2py_parse(resp.text, root) me_urls = data.get('rels', {}).get('me', []) if final in me_urls: final = root urls.append(final) urls = util.dedupe_urls(urls) # normalizes domains to lower case domains = [util.domain_from_link(url) for url in urls] return urls, domains
def test_social_user_page_mf2(self): """Check the custom mf2 we render on social user pages.""" self.sources[0].features = ['listen', 'publish'] self.sources[0].put() for entity in self.responses + self.publishes + self.blogposts: entity.put() user_url = self.sources[0].bridgy_path() resp = app.application.get_response(user_url) self.assertEquals(200, resp.status_int) parsed = util.mf2py_parse(resp.body, user_url) hcard = parsed.get('items', [])[0] self.assertEquals(['h-card'], hcard['type']) self.assertEquals( ['Fake User'], hcard['properties'].get('name')) self.assertEquals( ['http://fa.ke/profile/url'], hcard['properties'].get('url')) self.assertEquals( ['enabled'], hcard['properties'].get('bridgy-account-status')) self.assertEquals( ['enabled'], hcard['properties'].get('bridgy-listen-status')) self.assertEquals( ['enabled'], hcard['properties'].get('bridgy-publish-status')) expected_resps = self.responses[:10] for item, resp in zip(hcard['children'], expected_resps): self.assertIn('h-bridgy-response', item['type']) props = item['properties'] self.assertEquals([resp.status], props['bridgy-status']) self.assertEquals([json.loads(resp.activities_json[0])['url']], props['bridgy-original-source']) self.assertEquals(resp.unsent, props['bridgy-target']) publish = hcard['children'][len(expected_resps)] self.assertIn('h-bridgy-publish', publish['type']) props = publish['properties'] self.assertEquals([self.publishes[0].key.parent().id()], props['url']) self.assertEquals([self.publishes[0].status], props['bridgy-status'])
def test_social_user_page_mf2(self): """Check the custom mf2 we render on social user pages.""" self.sources[0].features = ['listen', 'publish'] self.sources[0].put() for entity in self.responses + self.publishes + self.blogposts: entity.put() user_url = self.sources[0].bridgy_path() resp = app.application.get_response(user_url) self.assertEquals(200, resp.status_int) parsed = util.mf2py_parse(resp.body, user_url) hcard = parsed.get('items', [])[0] self.assertEquals(['h-card'], hcard['type']) self.assertEquals(['Fake User'], hcard['properties'].get('name')) self.assertEquals(['http://fa.ke/profile/url'], hcard['properties'].get('url')) self.assertEquals(['enabled'], hcard['properties'].get('bridgy-account-status')) self.assertEquals(['enabled'], hcard['properties'].get('bridgy-listen-status')) self.assertEquals(['enabled'], hcard['properties'].get('bridgy-publish-status')) expected_resps = self.responses[:10] for item, resp in zip(hcard['children'], expected_resps): self.assertIn('h-bridgy-response', item['type']) props = item['properties'] self.assertEquals([resp.status], props['bridgy-status']) self.assertEquals([json.loads(resp.activities_json[0])['url']], props['bridgy-original-source']) self.assertEquals(resp.unsent, props['bridgy-target']) publish = hcard['children'][len(expected_resps)] self.assertIn('h-bridgy-publish', publish['type']) props = publish['properties'] self.assertEquals([self.publishes[0].key.parent().id()], props['url']) self.assertEquals([self.publishes[0].status], props['bridgy-status'])
def expand_target_urls(self, activity): """Expand the inReplyTo or object fields of an ActivityStreams object by fetching the original and looking for rel=syndication URLs. This method modifies the dict in place. Args: activity: an ActivityStreams dict of the activity being published """ for field in ('inReplyTo', 'object'): # microformats2.json_to_object de-dupes, no need to do it here objs = activity.get(field) if not objs: continue if isinstance(objs, dict): objs = [objs] augmented = list(objs) for obj in objs: url = obj.get('url') if not url: continue # get_webmention_target weeds out silos and non-HTML targets # that we wouldn't want to download and parse url, _, ok = util.get_webmention_target(url) if not ok: continue # fetch_mf2 raises a fuss if it can't fetch a mf2 document; # easier to just grab this ourselves than add a bunch of # special-cases to that method logging.debug('expand_target_urls fetching field=%s, url=%s', field, url) try: resp = util.requests_get(url) resp.raise_for_status() data = util.mf2py_parse(resp.text, url) except AssertionError: raise # for unit tests except BaseException: # it's not a big deal if we can't fetch an in-reply-to url logging.warning( 'expand_target_urls could not fetch field=%s, url=%s', field, url, exc_info=True) continue synd_urls = data.get('rels', {}).get('syndication', []) # look for syndication urls in the first h-entry queue = collections.deque(data.get('items', [])) while queue: item = queue.popleft() item_types = set(item.get('type', [])) if 'h-feed' in item_types and 'h-entry' not in item_types: queue.extend(item.get('children', [])) continue # these can be urls or h-cites synd_urls += microformats2.get_string_urls( item.get('properties', {}).get('syndication', [])) logging.debug( 'expand_target_urls found rel=syndication for url=%s: %r', url, synd_urls) augmented += [{'url': u} for u in synd_urls] activity[field] = augmented
def fetch_mf2(self, url, require_mf2=True, raise_errors=False): """Fetches a URL and extracts its mf2 data. Side effects: sets :attr:`entity`\ .html on success, calls :attr:`error()` on errors. Args: url: string require_mf2: boolean, whether to return error if no mf2 are found raise_errors: boolean, whether to let error exceptions propagate up or handle them Returns: (:class:`requests.Response`, mf2 data dict) on success, None on failure """ try: fetched = util.requests_get(url) fetched.raise_for_status() except BaseException as e: if raise_errors: raise util.interpret_http_exception(e) # log exception return self.error('Could not fetch source URL %s' % url) if self.entity: self.entity.html = fetched.text # .text is decoded unicode string, .content is raw bytes. if the HTTP # headers didn't specify a charset, pass raw bytes to BeautifulSoup so it # can look for a <meta> tag with a charset and decode. text = (fetched.text if 'charset' in fetched.headers.get( 'content-type', '') else fetched.content) doc = util.beautifulsoup_parse(text) # parse microformats data = util.mf2py_parse(doc, fetched.url) # special case tumblr's markup: div#content > div.post > div.copy # convert to mf2 and re-parse if not data.get('items'): contents = doc.find_all(id='content') if contents: post = contents[0].find_next(class_='post') if post: post['class'] = 'h-entry' copy = post.find_next(class_='copy') if copy: copy['class'] = 'e-content' photo = post.find_next(class_='photo-wrapper') if photo: img = photo.find_next('img') if img: img['class'] = 'u-photo' doc = unicode(post) data = util.mf2py_parse(doc, fetched.url) logging.debug('Parsed microformats2: %s', json.dumps(data, indent=2)) items = data.get('items', []) if require_mf2 and (not items or not items[0]): return self.error('No microformats2 data found in ' + fetched.url, data=data, html=""" No <a href="http://microformats.org/get-started">microformats</a> or <a href="http://microformats.org/wiki/microformats2">microformats2</a> found in <a href="%s">%s</a>! See <a href="http://indiewebify.me/">indiewebify.me</a> for details (skip to level 2, <em>Publishing on the IndieWeb</em>). """ % (fetched.url, util.pretty_link(fetched.url))) return fetched, data
def expand_target_urls(self, activity): """Expand the inReplyTo or object fields of an ActivityStreams object by fetching the original and looking for rel=syndication URLs. This method modifies the dict in place. Args: activity: an ActivityStreams dict of the activity being published """ for field in ('inReplyTo', 'object'): # microformats2.json_to_object de-dupes, no need to do it here objs = activity.get(field) if not objs: continue if isinstance(objs, dict): objs = [objs] augmented = list(objs) for obj in objs: url = obj.get('url') if not url: continue # get_webmention_target weeds out silos and non-HTML targets # that we wouldn't want to download and parse url, _, ok = util.get_webmention_target(url) if not ok: continue # fetch_mf2 raises a fuss if it can't fetch a mf2 document; # easier to just grab this ourselves than add a bunch of # special-cases to that method logging.debug('expand_target_urls fetching field=%s, url=%s', field, url) try: resp = util.requests_get(url) resp.raise_for_status() data = util.mf2py_parse(resp.text, url) except AssertionError: raise # for unit tests except BaseException: # it's not a big deal if we can't fetch an in-reply-to url logging.warning('expand_target_urls could not fetch field=%s, url=%s', field, url, exc_info=True) continue synd_urls = data.get('rels', {}).get('syndication', []) # look for syndication urls in the first h-entry queue = collections.deque(data.get('items', [])) while queue: item = queue.popleft() item_types = set(item.get('type', [])) if 'h-feed' in item_types and 'h-entry' not in item_types: queue.extend(item.get('children', [])) continue # these can be urls or h-cites synd_urls += microformats2.get_string_urls( item.get('properties', {}).get('syndication', [])) logging.debug('expand_target_urls found rel=syndication for url=%s: %r', url, synd_urls) augmented += [{'url': u} for u in synd_urls] activity[field] = augmented
def process_entry(source, permalink, feed_entry, refetch, preexisting, store_blanks=True): """Fetch and process an h-entry and save a new :class:`models.SyndicatedPost`. Args: source: permalink: url of the unprocessed post feed_entry: the h-feed version of the h-entry dict, often contains a partial version of the h-entry at the permalink refetch: boolean, whether to refetch and process entries we've seen before preexisting: list of previously discovered :class:`models.SyndicatedPost`\ s for this permalink store_blanks: boolean, whether we should store blank :class:`models.SyndicatedPost`\ s when we don't find a relationship Returns: a dict from syndicated url to a list of new :class:`models.SyndicatedPost`\ s """ # if the post has already been processed, do not add to the results # since this method only returns *newly* discovered relationships. if preexisting: # if we're refetching and this one is blank, do not return. # if there is a blank entry, it should be the one and only entry, # but go ahead and check 'all' of them to be safe. if not refetch: return {} synds = [s.syndication for s in preexisting if s.syndication] if synds: logging.debug( 'previously found relationship(s) for original %s: %s', permalink, synds) # first try with the h-entry from the h-feed. if we find the syndication url # we're looking for, we don't have to fetch the permalink permalink, _, type_ok = util.get_webmention_target(permalink) usynd = feed_entry.get('properties', {}).get('syndication', []) if usynd: logging.debug('u-syndication links on the h-feed h-entry: %s', usynd) results = _process_syndication_urls( source, permalink, set(url for url in usynd if isinstance(url, basestring)), preexisting) success = True if results: source.updates['last_feed_syndication_url'] = util.now_fn() elif not source.last_feed_syndication_url or not feed_entry: # fetch the full permalink page if we think it might have more details parsed = None try: logging.debug('fetching post permalink %s', permalink) if type_ok: resp = util.requests_get(permalink) resp.raise_for_status() parsed = util.mf2py_parse(resp.text, permalink) except AssertionError: raise # for unit tests except BaseException: # TODO limit the number of allowed failures logging.info('Could not fetch permalink %s', permalink, exc_info=True) success = False if parsed: syndication_urls = set() relsynd = parsed.get('rels').get('syndication', []) if relsynd: logging.debug('rel-syndication links: %s', relsynd) syndication_urls.update(url for url in relsynd if isinstance(url, basestring)) # there should only be one h-entry on a permalink page, but # we'll check all of them just in case. for hentry in (item for item in parsed['items'] if 'h-entry' in item['type']): usynd = hentry.get('properties', {}).get('syndication', []) if usynd: logging.debug('u-syndication links: %s', usynd) syndication_urls.update(url for url in usynd if isinstance(url, basestring)) results = _process_syndication_urls(source, permalink, syndication_urls, preexisting) # detect and delete SyndicatedPosts that were removed from the site if success: result_syndposts = itertools.chain(*results.values()) for syndpost in list(preexisting): if syndpost.syndication and syndpost not in result_syndposts: logging.info('deleting relationship that disappeared: %s', syndpost) syndpost.key.delete() preexisting.remove(syndpost) if not results: logging.debug('no syndication links from %s to current source %s.', permalink, source.label()) results = {} if store_blanks and not preexisting: # remember that this post doesn't have syndication links for this # particular source logging.debug( 'saving empty relationship so that %s will not be ' 'searched again', permalink) SyndicatedPost.insert_original_blank(source, permalink) # only return results that are not in the preexisting list new_results = {} for syndurl, syndposts_for_url in results.iteritems(): for syndpost in syndposts_for_url: if syndpost not in preexisting: new_results.setdefault(syndurl, []).append(syndpost) if new_results: logging.debug('discovered relationships %s', new_results) return new_results
def test_social_user_page_mf2(self): """Check the custom mf2 we render on social user pages.""" self.sources[0].features = ['listen', 'publish'] self.sources[0].put() # test invite with missing object and content resp = json.loads(self.responses[8].response_json) resp['verb'] = 'invite' resp.pop('object', None) resp.pop('content', None) self.responses[8].response_json = json.dumps(resp) # test that invites render the invitee, not the inviter # https://github.com/snarfed/bridgy/issues/754 self.responses[9].type = 'rsvp' self.responses[9].response_json = json.dumps({ 'id': 'tag:fa.ke,2013:111', 'objectType': 'activity', 'verb': 'invite', 'url': 'http://fa.ke/event', 'actor': { 'displayName': 'Mrs. Host', 'url': 'http://fa.ke/host', }, 'object': { 'objectType': 'person', 'displayName': 'Ms. Guest', 'url': 'http://fa.ke/guest', }, }) for entity in self.responses + self.publishes + self.blogposts: entity.put() user_url = self.sources[0].bridgy_path() response = app.application.get_response(user_url) self.assertEquals(200, response.status_int) parsed = util.mf2py_parse(response.body, user_url) hcard = parsed.get('items', [])[0] self.assertEquals(['h-card'], hcard['type']) self.assertEquals(['Fake User'], hcard['properties'].get('name')) self.assertEquals(['http://fa.ke/profile/url'], hcard['properties'].get('url')) self.assertEquals(['enabled'], hcard['properties'].get('bridgy-account-status')) self.assertEquals(['enabled'], hcard['properties'].get('bridgy-listen-status')) self.assertEquals(['enabled'], hcard['properties'].get('bridgy-publish-status')) expected_resps = self.responses[:10] for item, resp in zip(hcard['children'], expected_resps): self.assertIn('h-bridgy-response', item['type']) props = item['properties'] self.assertEquals([resp.status], props['bridgy-status']) self.assertEquals([json.loads(resp.activities_json[0])['url']], props['bridgy-original-source']) self.assertEquals(resp.unsent, props['bridgy-target']) # check invite invite = hcard['children'][-1]['properties'] self.assertIn('Ms. Guest is invited.', response.body) self.assertNotIn('Mrs. Host is invited.', response.body) publish = hcard['children'][len(expected_resps)] self.assertIn('h-bridgy-publish', publish['type']) props = publish['properties'] self.assertEquals([self.publishes[0].key.parent().id()], props['url']) self.assertEquals([self.publishes[0].status], props['bridgy-status'])
def fetch_mf2(self, url): """Fetches a URL and extracts its mf2 data. Side effects: sets :attr:`entity`\ .html on success, calls :attr:`error()` on errors. Args: url: string Returns: (:class:`requests.Response`, mf2 data dict) on success, None on failure """ try: fetched = util.requests_get(url) fetched.raise_for_status() except BaseException as e: util.interpret_http_exception(e) # log exception return self.error('Could not fetch source URL %s' % url) if self.entity: self.entity.html = fetched.text # .text is decoded unicode string, .content is raw bytes. if the HTTP # headers didn't specify a charset, pass raw bytes to BeautifulSoup so it # can look for a <meta> tag with a charset and decode. text = (fetched.text if 'charset' in fetched.headers.get('content-type', '') else fetched.content) doc = util.beautifulsoup_parse(text) # parse microformats, convert to ActivityStreams data = util.mf2py_parse(doc, fetched.url) # special case tumblr's markup: div#content > div.post > div.copy # convert to mf2 and re-parse if not data.get('items'): contents = doc.find_all(id='content') if contents: post = contents[0].find_next(class_='post') if post: post['class'] = 'h-entry' copy = post.find_next(class_='copy') if copy: copy['class'] = 'e-content' photo = post.find_next(class_='photo-wrapper') if photo: img = photo.find_next('img') if img: img['class'] = 'u-photo' doc = unicode(post) data = util.mf2py_parse(doc, fetched.url) logging.debug('Parsed microformats2: %s', json.dumps(data, indent=2)) items = data.get('items', []) if not items or not items[0]: return self.error('No microformats2 data found in ' + fetched.url, data=data, html=""" No <a href="http://microformats.org/get-started">microformats</a> or <a href="http://microformats.org/wiki/microformats2">microformats2</a> found in <a href="%s">%s</a>! See <a href="http://indiewebify.me/">indiewebify.me</a> for details (skip to level 2, <em>Publishing on the IndieWeb</em>). """ % (fetched.url, util.pretty_link(fetched.url))) return fetched, data
def process_entry(source, permalink, feed_entry, refetch, preexisting, store_blanks=True): """Fetch and process an h-entry and save a new :class:`models.SyndicatedPost`. Args: source: permalink: url of the unprocessed post feed_entry: the h-feed version of the h-entry dict, often contains a partial version of the h-entry at the permalink refetch: boolean, whether to refetch and process entries we've seen before preexisting: list of previously discovered :class:`models.SyndicatedPost`\ s for this permalink store_blanks: boolean, whether we should store blank :class:`models.SyndicatedPost`\ s when we don't find a relationship Returns: a dict from syndicated url to a list of new :class:`models.SyndicatedPost`\ s """ # if the post has already been processed, do not add to the results # since this method only returns *newly* discovered relationships. if preexisting: # if we're refetching and this one is blank, do not return. # if there is a blank entry, it should be the one and only entry, # but go ahead and check 'all' of them to be safe. if not refetch: return {} synds = [s.syndication for s in preexisting if s.syndication] if synds: logging.debug('previously found relationship(s) for original %s: %s', permalink, synds) # first try with the h-entry from the h-feed. if we find the syndication url # we're looking for, we don't have to fetch the permalink permalink, _, type_ok = util.get_webmention_target(permalink) usynd = feed_entry.get('properties', {}).get('syndication', []) if usynd: logging.debug('u-syndication links on the h-feed h-entry: %s', usynd) results = _process_syndication_urls(source, permalink, set( url for url in usynd if isinstance(url, basestring)), preexisting) success = True if results: source.updates['last_feed_syndication_url'] = util.now_fn() elif not source.last_feed_syndication_url or not feed_entry: # fetch the full permalink page if we think it might have more details parsed = None try: logging.debug('fetching post permalink %s', permalink) if type_ok: resp = util.requests_get(permalink) resp.raise_for_status() parsed = util.mf2py_parse(resp.text, permalink) except AssertionError: raise # for unit tests except BaseException: # TODO limit the number of allowed failures logging.info('Could not fetch permalink %s', permalink, exc_info=True) success = False if parsed: syndication_urls = set() relsynd = parsed.get('rels').get('syndication', []) if relsynd: logging.debug('rel-syndication links: %s', relsynd) syndication_urls.update(url for url in relsynd if isinstance(url, basestring)) # there should only be one h-entry on a permalink page, but # we'll check all of them just in case. for hentry in (item for item in parsed['items'] if 'h-entry' in item['type']): usynd = hentry.get('properties', {}).get('syndication', []) if usynd: logging.debug('u-syndication links: %s', usynd) syndication_urls.update(url for url in usynd if isinstance(url, basestring)) results = _process_syndication_urls( source, permalink, syndication_urls, preexisting) # detect and delete SyndicatedPosts that were removed from the site if success: result_syndposts = itertools.chain(*results.values()) for syndpost in list(preexisting): if syndpost.syndication and syndpost not in result_syndposts: logging.info('deleting relationship that disappeared: %s', syndpost) syndpost.key.delete() preexisting.remove(syndpost) if not results: logging.debug('no syndication links from %s to current source %s.', permalink, source.label()) results = {} if store_blanks and not preexisting: # remember that this post doesn't have syndication links for this # particular source logging.debug('saving empty relationship so that %s will not be ' 'searched again', permalink) SyndicatedPost.insert_original_blank(source, permalink) # only return results that are not in the preexisting list new_results = {} for syndurl, syndposts_for_url in results.iteritems(): for syndpost in syndposts_for_url: if syndpost not in preexisting: new_results.setdefault(syndurl, []).append(syndpost) if new_results: logging.debug('discovered relationships %s', new_results) return new_results
def test_social_user_page_mf2(self): """Check the custom mf2 we render on social user pages.""" self.sources[0].features = ['listen', 'publish'] self.sources[0].put() # test invite with missing object and content resp = json.loads(self.responses[8].response_json) resp['verb'] = 'invite' resp.pop('object', None) resp.pop('content', None) self.responses[8].response_json = json.dumps(resp) # test that invites render the invitee, not the inviter # https://github.com/snarfed/bridgy/issues/754 self.responses[9].type = 'rsvp' self.responses[9].response_json = json.dumps({ 'id': 'tag:fa.ke,2013:111', 'objectType': 'activity', 'verb': 'invite', 'url': 'http://fa.ke/event', 'actor': { 'displayName': 'Mrs. Host', 'url': 'http://fa.ke/host', }, 'object': { 'objectType': 'person', 'displayName': 'Ms. Guest', 'url': 'http://fa.ke/guest', }, }) for entity in self.responses + self.publishes + self.blogposts: entity.put() user_url = self.sources[0].bridgy_path() response = app.application.get_response(user_url) self.assertEquals(200, response.status_int) parsed = util.mf2py_parse(response.body, user_url) hcard = parsed.get('items', [])[0] self.assertEquals(['h-card'], hcard['type']) self.assertEquals( ['Fake User'], hcard['properties'].get('name')) self.assertEquals( ['http://fa.ke/profile/url'], hcard['properties'].get('url')) self.assertEquals( ['enabled'], hcard['properties'].get('bridgy-account-status')) self.assertEquals( ['enabled'], hcard['properties'].get('bridgy-listen-status')) self.assertEquals( ['enabled'], hcard['properties'].get('bridgy-publish-status')) expected_resps = self.responses[:10] for item, resp in zip(hcard['children'], expected_resps): self.assertIn('h-bridgy-response', item['type']) props = item['properties'] self.assertEquals([resp.status], props['bridgy-status']) self.assertEquals([json.loads(resp.activities_json[0])['url']], props['bridgy-original-source']) self.assertEquals(resp.unsent, props['bridgy-target']) # check invite html = response.body.decode('utf-8') self.assertIn('Ms. Guest is invited.', html) self.assertNotIn('Mrs. Host is invited.', html) publish = hcard['children'][len(expected_resps)] self.assertIn('h-bridgy-publish', publish['type']) props = publish['properties'] self.assertEquals([self.publishes[0].key.parent().id()], props['url']) self.assertEquals([self.publishes[0].status], props['bridgy-status'])