def handle_feed(self, resource): # TODO - model attribute for item count? # TODO - model attributes for other hard-coded values? types = ",".join([ "'%s'" % t for t in resource.resource_types ]) children = model.Resource.gql("WHERE class IN (" + types + ") ORDER BY publication_date DESC").fetch(10) last_modified = resource.modification_date for c in children: if c.modification_date > last_modified: last_modified = c.modification_date feed = rss.RssFeed( title = resource.title, description = resource.body, link = self.request.host_url + "/", copyright = "Copyright 2009 Jason DeFontes", email = "[email protected] (Jason DeFontes)", pub_date = last_modified, rss_link = self.request.host_url + resource.path ) for c in children: body = self.render_template(c.class_name().lower() + "_post.html", { "child": c }) # fix links to local URLs in body soup = BeautifulSoup(body, fromEncoding='utf-8') for link in soup.findAll('a', href=re.compile('^\/.*')): link['href'] = self.request.host_url + link['href'] for img in soup.findAll('img', src=re.compile('^\/.*')): img['src'] = self.request.host_url + img['src'] feed.add_item( title = c.title, description = unicode(soup), link = self.request.host_url + c.path, author = "[email protected] (Jason DeFontes)", pub_date = c.publication_date, guid = c.uuid ) return Representation("application/rss+xml", feed.to_xml(), True)
def strip_tags(text, valid_tags={}): """strip tags, remove invalid tag attrs. example:: >>> strip_tags('this <a href="">xxx</a>') u'this xxx' >>> strip_tags('this <a href="">xxx</a>',{'a':'href'}) u'this <a href="">xxx</a>' """ from app.BeautifulSoup import BeautifulSoup, Comment soup = BeautifulSoup(text) for comment in soup.findAll(text=lambda text: isinstance(text, Comment)): comment.extract() for tag in soup.findAll(True): if tag.name in valid_tags: valid_attrs = valid_tags[tag.name] tag.attrs = [(attr, val.replace('javascript:', '')) for attr, val in tag.attrs if attr in valid_attrs] else: tag.hidden = True return soup.renderContents().decode('utf8')