def _find_feed_items(mf2): """Extract feed items from given microformats2 data. If the top-level h-* item is an h-feed, return its children. Otherwise, returns the top-level items. Args: mf2: dict, parsed mf2 data Returns: list of dicts, each one representing an mf2 h-* item """ feeditems = mf2['items'] hfeeds = mf2util.find_all_entries(mf2, ('h-feed', )) if hfeeds: feeditems = list( itertools.chain.from_iterable( hfeed.get('children', []) for hfeed in hfeeds)) else: logger.debug('No h-feed found, fallback to top-level h-entrys.') if len(feeditems) > MAX_FEED_ENTRIES: logger.info( f'Feed has {len(feeditems)} entries! only processing the first {MAX_FEED_ENTRIES}.' ) feeditems = feeditems[:MAX_FEED_ENTRIES] return feeditems
def _find_feed_items(feed_url, feed_doc): """Extract feed items from a given URL and document. If the top-level h-* item is an h-feed, return its children. Otherwise, returns the top-level items. Args: feed_url: a string. the URL passed to mf2py parser feed_doc: a string or BeautifulSoup object. document is passed to mf2py parser Returns: a list of dicts, each one representing an mf2 h-* item """ parsed = util.mf2py_parse(feed_doc, feed_url) feeditems = parsed['items'] hfeeds = mf2util.find_all_entries(parsed, ('h-feed',)) if hfeeds: feeditems = list(itertools.chain.from_iterable( hfeed.get('children', []) for hfeed in hfeeds)) else: logging.debug('No h-feed found, fallback to top-level h-entrys.') if len(feeditems) > MAX_FEED_ENTRIES: logging.info('%s has %s entries! only processing the first %s.', feed_url, len(feeditems), MAX_FEED_ENTRIES) feeditems = feeditems[:MAX_FEED_ENTRIES] return feeditems
def _find_feed_items(feed_url, feed_doc): """Extract feed items from a given URL and document. If the top-level h-* item is an h-feed, return its children. Otherwise, returns the top-level items. Args: feed_url: a string. the URL passed to mf2py parser feed_doc: a string or BeautifulSoup object. document is passed to mf2py parser Returns: a list of dicts, each one representing an mf2 h-* item """ parsed = util.mf2py_parse(feed_doc, feed_url) feeditems = parsed['items'] hfeeds = mf2util.find_all_entries(parsed, ('h-feed', )) if hfeeds: feeditems = list( itertools.chain.from_iterable( hfeed.get('children', []) for hfeed in hfeeds)) else: logging.debug('No h-feed found, fallback to top-level h-entrys.') if len(feeditems) > MAX_FEED_ENTRIES: logging.info('%s has %s entries! only processing the first %s.', feed_url, len(feeditems), MAX_FEED_ENTRIES) feeditems = feeditems[:MAX_FEED_ENTRIES] return feeditems
def _find_feed_items(feed_url, feed_doc): """Extract feed items from a given URL and document. If the top-level h-* item is an h-feed, return its children. Otherwise, returns the top-level items. Args: feed_url: a string. the URL passed to mf2py parser feed_doc: a string or BeautifulSoup object. document is passed to mf2py parser Returns: a list of dicts, each one representing an mf2 h-* item """ parsed = mf2py.parse(url=feed_url, doc=feed_doc) feeditems = parsed['items'] hfeeds = mf2util.find_all_entries(parsed, ('h-feed',)) if hfeeds: feeditems = list(itertools.chain.from_iterable( hfeed.get('children', []) for hfeed in hfeeds)) else: logging.debug('No h-feed found, fallback to top-level h-entrys.') return feeditems
def representative_card(parsed, source_url): """ Find the representative card for a URL. http://microformats.org/wiki/representative-h-card-parsing :param dict parsed: an mf2 parsed dict :param str source_url: the source of the parsed document. :return: the representative h-card if one is found """ # FIXME NOTE mf2py needs plain mf2json parsed = json.loads(JSONEncoder().encode(parsed)) # FIXME source_url = uri.parse(source_url).minimized cards = [ h for h in util.find_all_entries(parsed, ["h-card"], include_properties=True) if ( h["properties"].get("name", [""])[0] or h["properties"].get("nickname", [""])[0] ) ] if match := _check_uid_and_url_match_source_url(source_url, cards): return match