def parse(content, url): """ produces a python representation of the RSS feed content given. This representation is documented at: http://feedparser.org content - string containing RSS/atom/etc xml document url - the url that the content was retrieved from. raises: InvalidFeedError if no feed could be parse. """ fake_headers = { 'content-location': url, 'content-type': 'text/xml; charset=utf-8', } ff = feedparser.parse(content, header_defaults=fake_headers) if ff is None or not 'feed' in ff: raise InvalidFeedError() # make sure the feed has an id... if not 'id' in ff.feed: ff.feed['id'] = url # make sure the feed has a self referential link has_self_ref = False ff.feed.setdefault('links', []) for link in ff.feed.links: if link.rel == 'self': has_self_ref = True break if not has_self_ref: ff.feed.links.append(FakeLink(rel='self', href=url, title='')) for e in ff.get('entries', []): # make sure it has an id eid = e.get('id', None) if eid is None: eid = find_best_entry_id(e) if eid is None: # throw this entry out, it has no # id, title, summary or content # that is recognizable... continue e['id'] = eid return ff
def _feed_info(request, query): if query is None: return None client = http.create_client(request.context.config) try: headers = {'Connection': 'close'} response, content = client.request(query, headers=headers) if response.status != 200: return None ff = feedparser.parse(content) if ff and 'feed' in ff and 'bozo_exception' not in ff: return {'url': query, 'title': ff.feed.get('title', '')} else: return None except: log.error("Error verifying feed at %s: %s" % (query, traceback.format_exc())) return None finally: http.close_all(client)