def update_feed(url, row_proxy): """ Fetches new stories for the feed at :url: and returns (feed, stories) """ row = {'fetched': datetime.now()} if row_proxy: parsed = feedparser.parse(url, etag=row_proxy.etag, modified=row_proxy.modified) for column in row_proxy.keys(): row[column] = row_proxy[column] else: parsed = feedparser.parse(url) # version is empty when the url is not a feed or when we give # modified/etag values and the feed hasn't changed if not parsed.version: print '{0} is not a feed or hasn\'t changed'.format(url) return None if parsed.status == 400: print '{0} has been permanently shut down'.format(url) return None if parsed.status == 301: print '{0} moved permanently to {1}'.format(url, parsed.url) conn = engine.connect() conn.execute(feeds.update().where(feeds.c.url == url) .values(url=parsed.url)) conn.close() url = parsed.url row['url'] = url row['title'] = parsed.feed.get('title', row.get('title')) row['etag'] = unicode(parsed.get('etag')) row['modified'] = unicode(parsed.get('modified')) parsed_time = parsed.feed.get('modified_parsed', parsed.feed.get('updated_parsed')) row['updated'] = time_to_datetime(parsed_time) or row['fetched'] story_rows = {} for entry in parsed.entries: story_row = _make_story_row(row['fetched'], entry) story_rows[story_row['uid']] = story_row return (row, story_rows)
def write_feed(row, stories, connection): """ Update the feed if it has been modified since it was last updated or if it's a new feed """ s = select([feeds], feeds.c.url == row['url']) old_row = connection.execute(s).fetchone() if old_row is None: connection.execute(feeds.insert(), **row) old_row = connection.execute(s).fetchone() if old_row.fetched < row['updated']: print 'Updating feed ' + row['url'] for story in stories.itervalues(): story['feed_id'] = old_row.id write_story(story, connection) u = feeds.update().where(feeds.c.id == old_row.id).values(**row) connection.execute(u) return True return False