def generate_feed_entries(url, **kw): """ generator; yields feed entries keywords: max - max number of entries to return yields: each FeedEntry instance """ logging.debug("generate_feed_entries url:%s" % url) max_entries = kw.get("max", None) doc = feedparser.parse(url) if doc.bozo: raise FeedException(doc.bozo_exception) count = 0 for e in doc.entries: title = e.title logging.debug("generate_feed_entries processing entry '%s'" % title) entry = FeedEntry(title=e.title, link=e.link) if hasattr(doc, "etag"): entry.set(etag=doc.etag) if hasattr(doc, "modified"): entry.set(modified=doc.modified) if hasattr(e, "content"): raw_content = e.content[0]['value'] elif e.summary: raw_content = e.summary else: logging.debug("no content found") continue # unescapes entities raw_content = unescape(raw_content) stripped_content = unicode(strip_html(raw_content)) entry.set(raw_content=raw_content) entry.set(stripped_content=stripped_content) yield entry count += 1 if max_entries and count == max_entries: break
def test_unescape_no_entities(self): s = "hi" self.assertEquals(s, unescape(s))
def test_unescape_with_entities(self): self.assertEquals("<", unescape("<")) self.assertEquals("abc", unescape("abc"))