def update_feedflow(feed, username): # update feed flow, # to memcache, all to db global FEED_NUM_IN_CACHE global TIME_INITIAL feedContent = feed_query(feed) f = db.GqlQuery("select * from feed_db_info where feed_url = :1", feed) # get the db feed flow first f = f.get() if f: feed_flow_db = json.loads(f.feed_flow) last_entry = f.last_entry else: # initialize feed_flow_db = [] last_entry = "" # update content if feedContent: update_feed_flow_db = [] for i in range(0, len(feedContent)): feeditem = feedContent[i] if feeditem[3] == last_entry: # if it is already the most recent feed, by comparing the url break else: update_feed_flow_db.append(feeditem) feed_flow_db = update_feed_flow_db + feed_flow_db # append to the left # determine the latest entry if len(feedContent) > 0: last_entry = feedContent[0][3] else: last_entry = "" # update db first if f: setattr(f, "feed_flow", json.dumps(feed_flow_db)) setattr(f, "last_entry", last_entry) else: f = feed_db_info(feed_url=feed, feed_flow=json.dumps(feed_flow_db), last_entry=last_entry) f.put() # update memcache feed_info_cache = memcache.get(feed) if feed_info_cache: feed_info_cache = json.loads(feed_info_cache) feed_info_cache["cached_flow"] = feed_flow_db[0 : min(FEED_NUM_IN_CACHE, len(feed_flow_db))] feed_info_cache["last_query"] = util.serialize_time(datetime.now()) else: # initialize the data structure feed_flow_cache = feed_flow_db[0 : min(FEED_NUM_IN_CACHE, len(feedContent)) - 1] # only save # items in cache subscriber = {"username": username, "unread": INITIAL_UNREAD_ITEM_NUM, "last_read": ""} feed_info_cache = {"last_query": TIME_INITIAL, "subscriber": subscriber, "cached_flow": feed_flow_cache} memcache.set(feed, json.dumps(feed_info_cache))
import collections from datetime import datetime sys.path.append(os.path.abspath("lib")) import feedparser as fp # configuration of feedparser to allow it support embedded objects fp._HTMLSanitizer.acceptable_elements.add("object") fp._HTMLSanitizer.acceptable_elements.add("embed") fp._HTMLSanitizer.acceptable_elements.add("iframe") # global variables section INITIAL_UNREAD_ITEM_NUM = "10" FEED_QUERY_GAP_SECONDS = 1200 FEED_NUM_IN_CACHE = 100 TIME_INITIAL = util.serialize_time(datetime.strptime("1900-01-01 00:00:00.1000", "%Y-%m-%d %H:%M:%S.%f")) # user section, user info control, user adds or deletes a feed class rss_user_data(db.Model): username = db.StringProperty(required=True) rss_feeds = db.TextProperty() # can be empty at initial stage # feed section, store entire feed flow for each feed in the db class feed_db_info(db.Model): feed_url = db.StringProperty(required=True) feed_flow = db.TextProperty() # can be empty at initial stage last_entry = db.StringProperty() def add_feed(username, feed):