def checkItems_(self, sender): NSLog("Checking items") if self.user is None or self.password is None: return #Retrieve all items ca = ClientAuthMethod(self.user,self.password) reader = GoogleReader(ca) reader.makeSpecialFeeds() specials = reader.getSpecialFeed(ReaderUrl.READING_LIST) specials.loadItems() if self.lastId is None: item = specials.items[0] self.lastId = item.id self.notifyNewItem(item) else: if len(specials.items) > 0: lastItem = specials.items[0].id for item in specials.items: if item.id == self.lastId: break self.notifyNewItem(item) self.lastId = lastItem NSLog("Finished Checking items")
def test_reading_list_exists(self): ca = ClientAuthMethod(username,password) reader = GoogleReader(ca) reader.makeSpecialFeeds() feeds = reader.getFeedContent(reader.getSpecialFeed(ReaderUrl.READING_LIST)) self.assertEqual(dict, type(feeds)) list_match = re.search('reading list in Google Reader', feeds['title']) self.assertTrue(list_match)
def test_reading_list_exists(self): ca = ClientAuthMethod(username, password) reader = GoogleReader(ca) reader.makeSpecialFeeds() feeds = reader.getFeedContent( reader.getSpecialFeed(ReaderUrl.READING_LIST)) self.assertEqual(dict, type(feeds)) list_match = re.search('reading list in Google Reader', feeds['title']) self.assertTrue(list_match)
def import_google_reader_begin(user_id, access_token): auth = OAuth2Method(settings.GOOGLE_OAUTH2_CLIENT_ID, settings.GOOGLE_OAUTH2_CLIENT_SECRET) auth.authFromAccessToken(access_token) reader = GoogleReader(auth) django_user, mongo_user = get_user_from_dbs(user_id) username = django_user.username try: user_infos = reader.getUserInfo() except TypeError: LOGGER.exception(u'Could not start Google Reader import for user %s.', username) # Don't refresh, it's now done by a dedicated periodic task. # If we failed, it means the problem is quite serious. # import_google_reader_trigger(user_id, refresh=True) return GR_MAX_FEEDS = config.GR_MAX_FEEDS LOGGER.info(u'Starting Google Reader import for user %s.', username) gri = GoogleReaderImport(user_id) # take note of user informations now that we have them. gri.start(user_infos=user_infos) reader.buildSubscriptionList() total_reads, reg_date = reader.totalReadItems(without_date=False) total_starred, star1_date = reader.totalStarredItems(without_date=False) total_feeds = len(reader.feeds) + 1 # +1 for 'starred' gri.reg_date(pytime.mktime(reg_date.timetuple())) gri.star1_date(pytime.mktime(star1_date.timetuple())) gri.total_reads(total_reads) gri.total_starred(total_starred) LOGGER.info( u'Google Reader import for user %s: %s feed(s) and %s read ' u'article(s) to go…', username, total_feeds, total_reads) if total_feeds > GR_MAX_FEEDS and not settings.DEBUG: mail_admins( 'User {0} has more than {1} feeds: {2}!'.format( username, GR_MAX_FEEDS, total_feeds), u"\n\nThe GR import will be incomplete.\n\n" u"Just for you to know…\n\n") # We launch the starred feed import first. Launching it after the # standard feeds makes it being delayed until the world's end. reader.makeSpecialFeeds() starred_feed = reader.getSpecialFeed(ReaderUrl.STARRED_LIST) import_google_reader_starred.apply_async((user_id, username, starred_feed), queue='low') processed_feeds = 1 feeds_to_import = [] for gr_feed in reader.feeds[:GR_MAX_FEEDS]: try: feed = create_feed(gr_feed, mongo_user) except Feed.DoesNotExist: LOGGER.exception( u'Could not create feed “%s” for user %s, ' u'skipped.', gr_feed.title, username) continue processed_feeds += 1 feeds_to_import.append((user_id, username, gr_feed, feed)) LOGGER.info(u'Imported feed “%s” (%s/%s) for user %s…', gr_feed.title, processed_feeds, total_feeds, username) # We need to clamp the total, else task won't finish in # the case where the user has more feeds than allowed. # gri.total_feeds(min(processed_feeds, GR_MAX_FEEDS)) for feed_args in feeds_to_import: import_google_reader_articles.apply_async(feed_args, queue='low') LOGGER.info( u'Imported %s/%s feeds in %s. Articles import already ' u'started with limits: date: %s, %s waves of %s articles, ' u'max articles: %s, reads: %s, starred: %s.', processed_feeds, total_feeds, naturaldelta(now() - gri.start()), naturaltime(max([gri.reg_date(), GR_OLDEST_DATE])), config.GR_WAVE_LIMIT, config.GR_LOAD_LIMIT, config.GR_MAX_ARTICLES, total_reads, total_starred)
class HooverReader(object): ''' Export everything that was saved in Google Reader as JSON objects. Keep as much information as possible, but especially ID (useful for cross-referencing), title, url, notes (probably gone) and read status. Each file should contain entries for just one category/tag. List of categories (folders in Google Reader) should be stores in categories.json. Script has no memory and will always fetch everything (doesn't do incremental updates). Script will NOT save list of feeds since those can be exported as OPML from Google Reader. If it hits rate limit, then it will pause up to half an hour before giving up. DILEMMAS: - Should we save feeds contents? How far? - Should we save categories contents? How far? (probably; categories can contain entries labeled with category label that are not otherwise tagged and hence not backed up) Save: - all tagged entries (labeled feeds; categories that don't contain feeds) - list of categories with feeds they contain Algorithm: - fetch a list of categories - fetch a list of all labels (which includes categories) - for every label which is not a category: - loadItems - execute loadMoreItems until items count remains same (or error) - dump data as JSON to file - for every category fetch a list of feeds it contains - dump the list of categories with feeds as JSON to a file ''' def __init__(self, username, password): self.auth = ClientAuthMethod(username, password) self.reader = GoogleReader(self.auth) self.reader.makeSpecialFeeds() # Fetches list of special feeds like starred self.reader.buildSubscriptionList() # Fetches list of feeds AND categories self.categories = self.reader.categories self.feeds = self.reader.feeds[:] # Make a copy so lib calls don't feel it with crap self.specialFeeds = self.reader.specialFeeds.copy() def __create_feed_filename(self, feed_label): return "{0}.json".format(feed_label) def get_tags(self): tags_json = self.reader.httpGet( 'https://www.google.com/reader/api/0/tag/list', {'output': 'json'}) tags = json.loads(tags_json) tags_list = tags['tags'] self.tags = tags_list def load_items(self, feed): fetch_size = 1000 tryagain = 0 feed.loadItems(loadLimit=fetch_size) while (feed.lastLoadLength > 0 and feed.lastLoadLength == fetch_size) \ or (tryagain > 0 and tryagain < 5): feed.loadMoreItems(loadLimit=fetch_size) if not feed.lastLoadOk: print "Error fetching items for feed '{0}'".format( feed.title) pause_for = PAUSE_INTERVAL * (2 ** tryagain) print "Pausing for a {0} minute(s)...".format(pause_for / 60) # Double time to sleep on each iteration time.sleep(pause_for) tryagain += 1 else: tryagain = 0 return feed.items def process_item(self, item): values = {} keys = ('id', 'title', 'content', 'read', 'starred', 'shared', 'url') for key in keys: values[key] = getattr(item, key, u'') values['origin'] = getattr(item, 'origin', {}) return values def get_feed_info(self, feed): feed_obj = { 'feed_id': feed.id, 'title': feed.title, 'site_url': getattr(feed, "siteUrl", ""), 'feed_url': getattr(feed, "feedUrl", ""), 'last_updated': feed.lastUpdated, # Unix timestamp; updated when feed is fetched } return feed_obj def save_to_file(self, filename, obj, subdir=None): save_dir = BACKUP_DIR if subdir: save_dir = join(BACKUP_DIR, subdir) if not os.path.exists(save_dir): try: os.makedirs(save_dir) except: # Could not create it print 'Could not create backup directory {0}. Exiting.'.format( save_dir) sys.exit(1) obj_json = toJSON(obj) fname = join(save_dir, filename) with open(fname, 'w') as f: f.write(obj_json) def save_feed(self, feed, subdir=None): items = [] print 'Saving:', feed.title.encode('utf-8') try: raw_items = self.load_items(feed) except: print 'Failed. Moving on...' print return for item in raw_items: items.append(self.process_item(item)) feed_obj = self.get_feed_info(feed) feed_obj['items'] = items feed_obj['items_count'] = len(items) self.save_to_file(self.__create_feed_filename(feed.title), feed_obj, subdir) def process_category(self, category): cat = { 'id': category.id, 'title': category.label, } cat['feeds'] = [self.get_feed_info(feed) for feed in category.feeds] return cat def save_tag(self, tag): cat = { 'id': tag.id, 'title': tag.label, } print 'Saving:', tag.label.encode('utf-8') cat['items'] = [self.process_item(item) for item in self.load_items(tag)] cat['items_count'] = len(cat['items']) self.save_to_file(self.__create_feed_filename(cat['title']), cat, 'tags') def save_categories(self): categories = { 'title': 'Google Reader Categories' } categories['categories'] = [self.process_category(cat) for cat in self.categories] if len(categories['categories']): self.save_to_file("categories.json", categories) else: print 'There are no categories to save.' def save_feed_list(self): feeds = { 'title': 'Google Reader List of Feeds' } feeds_list = [] for feed in self.feeds: feeds_list.append(self.get_feed_info(feed)) feeds['feeds'] = feeds_list if len(feeds['feeds']): self.save_to_file("feeds.json", feeds) else: print 'There are no feeds to save.' def backup(self): if getattr(settings, 'SAVE_TAGS', True): print "Saving tags..." self.get_tags() for tag in self.tags: # Tag is really a category try: label = tag['id'].rsplit('label/')[1] except: # Special feeds (state/); skip, they are handled separately continue ctag = Category(self.reader, label, tag['id']) self.save_tag(ctag) if getattr(settings, 'SAVE_FEEDS', False): print "Saving feeds..." for feed in self.feeds: self.save_feed(feed, 'feeds') print "Saving special feeds..." if getattr(settings, 'SAVE_SPECIAL_FEEDS_ALL', False): sf_keys = self.specialFeeds.keys() else: sf_keys = ('starred', ) for feed_name in sf_keys: feed = self.specialFeeds[feed_name] self.save_feed(feed, 'special') if getattr(settings, 'SAVE_CATEGORIES', True): print "Saving list of feeds and categories..." self.save_feed_list() self.save_categories()
def import_google_reader_begin(user_id, access_token): auth = OAuth2Method(settings.GOOGLE_OAUTH2_CLIENT_ID, settings.GOOGLE_OAUTH2_CLIENT_SECRET) auth.authFromAccessToken(access_token) reader = GoogleReader(auth) django_user, mongo_user = get_user_from_dbs(user_id) username = django_user.username try: user_infos = reader.getUserInfo() except TypeError: LOGGER.exception(u'Could not start Google Reader import for user %s.', username) # Don't refresh, it's now done by a dedicated periodic task. # If we failed, it means the problem is quite serious. # import_google_reader_trigger(user_id, refresh=True) return GR_MAX_FEEDS = config.GR_MAX_FEEDS LOGGER.info(u'Starting Google Reader import for user %s.', username) gri = GoogleReaderImport(user_id) # take note of user informations now that we have them. gri.start(user_infos=user_infos) reader.buildSubscriptionList() total_reads, reg_date = reader.totalReadItems(without_date=False) total_starred, star1_date = reader.totalStarredItems(without_date=False) total_feeds = len(reader.feeds) + 1 # +1 for 'starred' gri.reg_date(pytime.mktime(reg_date.timetuple())) gri.star1_date(pytime.mktime(star1_date.timetuple())) gri.total_reads(total_reads) gri.total_starred(total_starred) LOGGER.info(u'Google Reader import for user %s: %s feed(s) and %s read ' u'article(s) to go…', username, total_feeds, total_reads) if total_feeds > GR_MAX_FEEDS and not settings.DEBUG: mail_admins('User {0} has more than {1} feeds: {2}!'.format( username, GR_MAX_FEEDS, total_feeds), u"\n\nThe GR import will be incomplete.\n\n" u"Just for you to know…\n\n") # We launch the starred feed import first. Launching it after the # standard feeds makes it being delayed until the world's end. reader.makeSpecialFeeds() starred_feed = reader.getSpecialFeed(ReaderUrl.STARRED_LIST) import_google_reader_starred.apply_async((user_id, username, starred_feed), queue='low') processed_feeds = 1 feeds_to_import = [] for gr_feed in reader.feeds[:GR_MAX_FEEDS]: try: feed = create_feed(gr_feed, mongo_user) except Feed.DoesNotExist: LOGGER.exception(u'Could not create feed “%s” for user %s, ' u'skipped.', gr_feed.title, username) continue processed_feeds += 1 feeds_to_import.append((user_id, username, gr_feed, feed)) LOGGER.info(u'Imported feed “%s” (%s/%s) for user %s…', gr_feed.title, processed_feeds, total_feeds, username) # We need to clamp the total, else task won't finish in # the case where the user has more feeds than allowed. # gri.total_feeds(min(processed_feeds, GR_MAX_FEEDS)) for feed_args in feeds_to_import: import_google_reader_articles.apply_async(feed_args, queue='low') LOGGER.info(u'Imported %s/%s feeds in %s. Articles import already ' u'started with limits: date: %s, %s waves of %s articles, ' u'max articles: %s, reads: %s, starred: %s.', processed_feeds, total_feeds, naturaldelta(now() - gri.start()), naturaltime(max([gri.reg_date(), GR_OLDEST_DATE])), config.GR_WAVE_LIMIT, config.GR_LOAD_LIMIT, config.GR_MAX_ARTICLES, total_reads, total_starred)