def count_unreads_for_subscribers(self, feed): user_subs = UserSubscription.objects.filter( feed=feed, active=True, user__profile__last_seen_on__gte=feed.unread_cutoff ).order_by("-last_read_date") if not user_subs.count(): return for sub in user_subs: if not sub.needs_unread_recalc: sub.needs_unread_recalc = True sub.save() if self.options["compute_scores"]: r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL) stories = MStory.objects(story_feed_id=feed.pk, story_date__gte=feed.unread_cutoff) stories = Feed.format_stories(stories, feed.pk) story_hashes = r.zrangebyscore( "zF:%s" % feed.pk, int(feed.unread_cutoff.strftime("%s")), int(time.time() + 60 * 60 * 24) ) missing_story_hashes = set(story_hashes) - set([s["story_hash"] for s in stories]) if missing_story_hashes: missing_stories = MStory.objects( story_feed_id=feed.pk, story_hash__in=missing_story_hashes ).read_preference(pymongo.ReadPreference.PRIMARY) missing_stories = Feed.format_stories(missing_stories, feed.pk) stories = missing_stories + stories logging.debug( u" ---> [%-30s] ~FYFound ~SB~FC%s(of %s)/%s~FY~SN un-secondaried stories while computing scores" % (feed.title[:30], len(missing_stories), len(missing_story_hashes), len(stories)) ) cache.set("S:%s" % feed.pk, stories, 60) logging.debug( u" ---> [%-30s] ~FYComputing scores: ~SB%s stories~SN with ~SB%s subscribers ~SN(%s/%s/%s)" % ( feed.title[:30], len(stories), user_subs.count(), feed.num_subscribers, feed.active_subscribers, feed.premium_subscribers, ) ) self.calculate_feed_scores_with_stories(user_subs, stories) elif self.options.get("mongodb_replication_lag"): logging.debug( u" ---> [%-30s] ~BR~FYSkipping computing scores: ~SB%s seconds~SN of mongodb lag" % (feed.title[:30], self.options.get("mongodb_replication_lag")) )
def more_like_this(request): user = get_user(request) get_post = getattr(request, request.method) order = get_post.get('order', 'newest') page = int(get_post.get('page', 1)) limit = int(get_post.get('limit', 10)) offset = limit * (page - 1) story_hash = get_post.get('story_hash') feed_ids = [ us.feed_id for us in UserSubscription.objects.filter(user=user) ] feed_ids, _ = MStory.split_story_hash(story_hash) story_ids = SearchStory.more_like_this([feed_ids], story_hash, order, offset=offset, limit=limit) stories_db = MStory.objects(story_hash__in=story_ids).order_by( '-story_date' if order == "newest" else 'story_date') stories = Feed.format_stories(stories_db) return { "stories": stories, }
def count_unreads_for_subscribers(self, feed): UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta( days=settings.DAYS_OF_UNREAD) user_subs = UserSubscription.objects.filter(feed=feed, active=True, user__profile__last_seen_on__gte=UNREAD_CUTOFF)\ .order_by('-last_read_date') if not user_subs.count(): return for sub in user_subs: if not sub.needs_unread_recalc: sub.needs_unread_recalc = True sub.save() if self.options['compute_scores']: stories = MStory.objects(story_feed_id=feed.pk, story_date__gte=UNREAD_CUTOFF)\ .read_preference(pymongo.ReadPreference.PRIMARY) stories = Feed.format_stories(stories, feed.pk) logging.debug( u' ---> [%-30s] ~FYComputing scores: ~SB%s stories~SN with ~SB%s subscribers ~SN(%s/%s/%s)' % (feed.title[:30], len(stories), user_subs.count(), feed.num_subscribers, feed.active_subscribers, feed.premium_subscribers)) self.calculate_feed_scores_with_stories(user_subs, stories) elif self.options.get('mongodb_replication_lag'): logging.debug( u' ---> [%-30s] ~BR~FYSkipping computing scores: ~SB%s seconds~SN of mongodb lag' % (feed.title[:30], self.options.get('mongodb_replication_lag')))
def load_starred_stories(request): user = get_user(request) offset = int(request.REQUEST.get('offset', 0)) limit = int(request.REQUEST.get('limit', 10)) page = int(request.REQUEST.get('page', 0)) if page: offset = limit * page mstories = MStarredStory.objects(user_id=user.pk).order_by('-starred_date')[offset:offset+limit] stories = Feed.format_stories(mstories) for story in stories: story_date = localtime_for_timezone(story['story_date'], user.profile.timezone) now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone) story['short_parsed_date'] = format_story_link_date__short(story_date, now) story['long_parsed_date'] = format_story_link_date__long(story_date, now) starred_date = localtime_for_timezone(story['starred_date'], user.profile.timezone) story['starred_date'] = format_story_link_date__long(starred_date, now) story['read_status'] = 1 story['starred'] = True story['intelligence'] = { 'feed': 0, 'author': 0, 'tags': 0, 'title': 0, } logging.user(request.user, "~FCLoading starred stories: ~SB%s stories" % (len(stories))) return dict(stories=stories)
def load_starred_stories(request): user = get_user(request) offset = int(request.REQUEST.get('offset', 0)) limit = int(request.REQUEST.get('limit', 10)) page = int(request.REQUEST.get('page', 0)) if page: offset = limit * (page - 1) mstories = MStarredStory.objects(user_id=user.pk).order_by('-starred_date')[offset:offset+limit] stories = Feed.format_stories(mstories) for story in stories: story_date = localtime_for_timezone(story['story_date'], user.profile.timezone) now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone) story['short_parsed_date'] = format_story_link_date__short(story_date, now) story['long_parsed_date'] = format_story_link_date__long(story_date, now) starred_date = localtime_for_timezone(story['starred_date'], user.profile.timezone) story['starred_date'] = format_story_link_date__long(starred_date, now) story['read_status'] = 1 story['starred'] = True story['intelligence'] = { 'feed': 0, 'author': 0, 'tags': 0, 'title': 0, } logging.user(request, "~FCLoading starred stories: ~SB%s stories" % (len(stories))) return dict(stories=stories)
def story_public_comments(request): format = request.REQUEST.get('format', 'json') relative_user_id = request.REQUEST.get('user_id', None) feed_id = int(request.REQUEST['feed_id']) story_id = request.REQUEST['story_id'] if not relative_user_id: relative_user_id = get_user(request).pk stories = MSharedStory.objects.filter(story_feed_id=feed_id, story_guid=story_id).limit(1) stories = Feed.format_stories(stories) stories, profiles = MSharedStory.stories_with_comments_and_profiles(stories, relative_user_id, check_all=True, public=True) if format == 'html': stories = MSharedStory.attach_users_to_stories(stories, profiles) return render_to_response('social/story_comments.xhtml', { 'story': stories[0], }, context_instance=RequestContext(request)) else: return json.json_response(request, { 'comments': stories[0]['public_comments'], 'user_profiles': profiles, })
def count_unreads_for_subscribers(self, feed): UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD) user_subs = UserSubscription.objects.filter(feed=feed, active=True, user__profile__last_seen_on__gte=UNREAD_CUTOFF)\ .order_by('-last_read_date') if not user_subs.count(): return for sub in user_subs: if not sub.needs_unread_recalc: sub.needs_unread_recalc = True sub.save() if self.options['compute_scores']: stories = MStory.objects(story_feed_id=feed.pk, story_date__gte=UNREAD_CUTOFF)\ .read_preference(pymongo.ReadPreference.PRIMARY) stories = Feed.format_stories(stories, feed.pk) logging.debug(u' ---> [%-30s] ~FYComputing scores: ~SB%s stories~SN with ~SB%s subscribers ~SN(%s/%s/%s)' % ( feed.title[:30], len(stories), user_subs.count(), feed.num_subscribers, feed.active_subscribers, feed.premium_subscribers)) self.calculate_feed_scores_with_stories(user_subs, stories) elif self.options.get('mongodb_replication_lag'): logging.debug(u' ---> [%-30s] ~BR~FYSkipping computing scores: ~SB%s seconds~SN of mongodb lag' % ( feed.title[:30], self.options.get('mongodb_replication_lag')))
def count_unreads_for_subscribers(self, feed): user_subs = UserSubscription.objects.filter(feed=feed, active=True, user__profile__last_seen_on__gte=feed.unread_cutoff)\ .order_by('-last_read_date') if not user_subs.count(): return for sub in user_subs: if not sub.needs_unread_recalc: sub.needs_unread_recalc = True sub.save() if self.options['compute_scores']: r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL) stories = MStory.objects(story_feed_id=feed.pk, story_date__gte=feed.unread_cutoff) stories = Feed.format_stories(stories, feed.pk) story_hashes = r.zrangebyscore( 'zF:%s' % feed.pk, int(feed.unread_cutoff.strftime('%s')), int(time.time() + 60 * 60 * 24)) missing_story_hashes = set(story_hashes) - set( [s['story_hash'] for s in stories]) if missing_story_hashes: missing_stories = MStory.objects(story_feed_id=feed.pk, story_hash__in=missing_story_hashes)\ .read_preference(pymongo.ReadPreference.PRIMARY) missing_stories = Feed.format_stories(missing_stories, feed.pk) stories = missing_stories + stories logging.debug( u' ---> [%-30s] ~FYFound ~SB~FC%s(of %s)/%s~FY~SN un-secondaried stories while computing scores' % (feed.title[:30], len(missing_stories), len(missing_story_hashes), len(stories))) cache.set("S:%s" % feed.pk, stories, 60) logging.debug( u' ---> [%-30s] ~FYComputing scores: ~SB%s stories~SN with ~SB%s subscribers ~SN(%s/%s/%s)' % (feed.title[:30], len(stories), user_subs.count(), feed.num_subscribers, feed.active_subscribers, feed.premium_subscribers)) self.calculate_feed_scores_with_stories(user_subs, stories) elif self.options.get('mongodb_replication_lag'): logging.debug( u' ---> [%-30s] ~BR~FYSkipping computing scores: ~SB%s seconds~SN of mongodb lag' % (feed.title[:30], self.options.get('mongodb_replication_lag')))
def push_feed_notifications(cls, feed_id, new_stories, force=False): feed = Feed.get_by_id(feed_id) notifications = MUserFeedNotification.users_for_feed(feed.pk) logging.debug( " ---> [%-30s] ~FCPushing out notifications to ~SB%s users~SN for ~FB~SB%s stories" % (feed, len(notifications), new_stories)) r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL) latest_story_hashes = r.zrange("zF:%s" % feed.pk, -1 * new_stories, -1) mstories = MStory.objects.filter( story_hash__in=latest_story_hashes).order_by('-story_date') stories = Feed.format_stories(mstories) total_sent_count = 0 for user_feed_notification in notifications: sent_count = 0 last_notification_date = user_feed_notification.last_notification_date try: usersub = UserSubscription.objects.get( user=user_feed_notification.user_id, feed=user_feed_notification.feed_id) except UserSubscription.DoesNotExist: continue classifiers = user_feed_notification.classifiers(usersub) if classifiers == None: if settings.DEBUG: logging.debug("Has no usersubs") continue for story in stories: if sent_count >= 3: if settings.DEBUG: logging.debug("Sent too many, ignoring...") continue if story['story_date'] <= last_notification_date and not force: if settings.DEBUG: logging.debug( "Story date older than last notification date: %s <= %s" % (story['story_date'], last_notification_date)) continue if story[ 'story_date'] > user_feed_notification.last_notification_date: user_feed_notification.last_notification_date = story[ 'story_date'] user_feed_notification.save() story['story_content'] = html.unescape(story['story_content']) sent = user_feed_notification.push_story_notification( story, classifiers, usersub) if sent: sent_count += 1 total_sent_count += 1 return total_sent_count, len(notifications)
def api_saved_story(request): user = request.user body = request.body_json after = body.get('after', None) before = body.get('before', None) limit = body.get('limit', 50) fields = body.get('triggerFields') story_tag = fields['story_tag'] entries = [] if story_tag == "all": story_tag = "" params = dict(user_id=user.pk) if story_tag: params.update(dict(user_tags__contains=story_tag)) mstories = MStarredStory.objects(**params).order_by('-starred_date')[:limit] stories = Feed.format_stories(mstories) found_feed_ids = list(set([story['story_feed_id'] for story in stories])) feeds = dict([(f.pk, { "title": f.feed_title, "website": f.feed_link, "address": f.feed_address, }) for f in Feed.objects.filter(pk__in=found_feed_ids)]) for story in stories: if before and int(story['story_date'].strftime("%s")) > before: continue if after and int(story['story_date'].strftime("%s")) < after: continue feed = feeds.get(story['story_feed_id'], None) entries.append({ "StoryTitle": story['story_title'], "StoryContent": story['story_content'], "StoryURL": story['story_permalink'], "StoryAuthor": story['story_authors'], "PublishedAt": story['story_date'].strftime("%Y-%m-%dT%H:%M:%SZ"), "SavedAt": story['starred_date'].strftime("%Y-%m-%dT%H:%M:%SZ"), "Tags": ', '.join(story['user_tags']), "Site": feed and feed['title'], "SiteURL": feed and feed['website'], "SiteRSS": feed and feed['address'], "ifttt": { "id": story['story_hash'], "timestamp": int(story['starred_date'].strftime("%s")) }, }) if after: entries = sorted(entries, key=lambda s: s['ifttt']['timestamp']) logging.user(request, "~FCChecking saved stories from ~SBIFTTT~SB: ~SB%s~SN - ~SB%s~SN stories" % (story_tag if story_tag else "[All stories]", len(entries))) return {"data": entries}
def api_saved_story(request): user = request.user body = request.body_json after = body.get('after', None) before = body.get('before', None) limit = body.get('limit', 50) fields = body.get('triggerFields') story_tag = fields['story_tag'] entries = [] if story_tag == "all": story_tag = "" params = dict(user_id=user.pk) if story_tag: params.update(dict(user_tags__contains=story_tag)) mstories = MStarredStory.objects(**params).order_by('-starred_date')[:limit] stories = Feed.format_stories(mstories) found_feed_ids = list(set([story['story_feed_id'] for story in stories])) feeds = dict([(f.pk, { "title": f.feed_title, "website": f.feed_link, "address": f.feed_address, }) for f in Feed.objects.filter(pk__in=found_feed_ids)]) for story in stories: if before and int(story['story_date'].strftime("%s")) > before: continue if after and int(story['story_date'].strftime("%s")) < after: continue feed = feeds.get(story['story_feed_id'], None) entries.append({ "StoryTitle": story['story_title'], "StoryContent": story['story_content'], "StoryURL": story['story_permalink'], "StoryAuthor": story['story_authors'], "PublishedAt": story['story_date'].strftime("%Y-%m-%dT%H:%M:%SZ"), "SavedAt": story['starred_date'].strftime("%Y-%m-%dT%H:%M:%SZ"), "Tags": ', '.join(story['user_tags']), "Site": feed and feed['title'], "SiteURL": feed and feed['website'], "SiteRSS": feed and feed['address'], "meta": { "id": story['story_hash'], "timestamp": int(story['starred_date'].strftime("%s")) }, }) if after: entries = sorted(entries, key=lambda s: s['meta']['timestamp']) logging.user(request, "~FCChecking saved stories from ~SBIFTTT~SB: ~SB%s~SN - ~SB%s~SN stories" % (story_tag if story_tag else "[All stories]", len(entries))) return {"data": entries}
def api_saved_story(request): user = request.user body = json.decode(request.body) after = body.get('after', None) before = body.get('before', None) limit = body.get('limit', 50) fields = body.get('triggerFields') story_tag = fields['story_tag'] entries = [] if story_tag == "all": story_tag = "" mstories = MStarredStory.objects( user_id=user.pk, user_tags__contains=story_tag ).order_by('-starred_date')[:limit] stories = Feed.format_stories(mstories) found_feed_ids = list(set([story['story_feed_id'] for story in stories])) feeds = dict([(f.pk, { "title": f.feed_title, "website": f.feed_link, "address": f.feed_address, }) for f in Feed.objects.filter(pk__in=found_feed_ids)]) for story in stories: if before and int(story['story_date'].strftime("%s")) > before: continue if after and int(story['story_date'].strftime("%s")) < after: continue feed = feeds.get(story['story_feed_id'], None) entries.append({ "StoryTitle": story['story_title'], "StoryContent": story['story_content'], "StoryUrl": story['story_permalink'], "StoryAuthor": story['story_authors'], "StoryDate": story['story_date'].isoformat(), "SavedDate": story['starred_date'].isoformat(), "SavedTags": ', '.join(story['user_tags']), "SiteTitle": feed and feed['title'], "SiteWebsite": feed and feed['website'], "SiteFeedAddress": feed and feed['address'], "ifttt": { "id": story['story_hash'], "timestamp": int(story['starred_date'].strftime("%s")) }, }) logging.user(request, "~FCChecking saved stories from ~SBIFTTT~SB: ~SB%s~SN - ~SB%s~SN stories" % (story_tag if story_tag else "[All stories]", len(entries))) return {"data": entries}
def push_feed_notifications(cls, feed_id, new_stories, force=False): feed = Feed.get_by_id(feed_id) notifications = MUserFeedNotification.users_for_feed(feed.pk) logging.debug(" ---> [%-30s] ~FCPushing out notifications to ~SB%s users~SN for ~FB~SB%s stories" % ( feed, len(notifications), new_stories)) r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL) latest_story_hashes = r.zrange("zF:%s" % feed.pk, -1 * new_stories, -1) mstories = MStory.objects.filter(story_hash__in=latest_story_hashes).order_by('-story_date') stories = Feed.format_stories(mstories) total_sent_count = 0 for user_feed_notification in notifications: sent_count = 0 last_notification_date = user_feed_notification.last_notification_date try: usersub = UserSubscription.objects.get(user=user_feed_notification.user_id, feed=user_feed_notification.feed_id) except UserSubscription.DoesNotExist: continue classifiers = user_feed_notification.classifiers(usersub) if classifiers == None: logging.debug("Has no usersubs") continue for story in stories: if sent_count >= 3: logging.debug("Sent too many, ignoring...") continue if story['story_date'] <= last_notification_date and not force: logging.debug("Story date older than last notification date: %s <= %s" % (story['story_date'], last_notification_date)) continue if story['story_date'] > user_feed_notification.last_notification_date: user_feed_notification.last_notification_date = story['story_date'] user_feed_notification.save() story['story_content'] = HTMLParser().unescape(story['story_content']) sent = user_feed_notification.push_story_notification(story, classifiers, usersub) if sent: sent_count += 1 total_sent_count += 1 return total_sent_count, len(notifications)
def load_river_stories(request): user = get_user(request) feed_ids = [int(feed_id) for feed_id in request.POST.getlist("feeds")] offset = int(request.REQUEST.get("offset", 0)) limit = int(request.REQUEST.get("limit", 25)) page = int(request.REQUEST.get("page", 0)) + 1 read_stories = int(request.REQUEST.get("read_stories", 0)) # if page: offset = limit * page if page: limit = limit * page - read_stories def feed_qvalues(feed_id): feed = UserSubscription.objects.get(feed__pk=feed_id, user=user) return Q(story_feed_id=feed_id) & Q(story_date__gte=feed.mark_read_date) feed_last_reads = map(feed_qvalues, feed_ids) qs = reduce(lambda q1, q2: q1 | q2, feed_last_reads) read_stories = MUserStory.objects(user_id=user.pk, feed_id__in=feed_ids).only("story") read_stories = [rs.story.id for rs in read_stories] mstories = MStory.objects(Q(id__nin=read_stories) & qs)[offset : offset + limit] stories = Feed.format_stories(mstories) starred_stories = MStarredStory.objects(user_id=user.pk, story_feed_id__in=feed_ids).only( "story_guid", "starred_date" ) starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories]) for story in stories: story_date = localtime_for_timezone(story["story_date"], user.profile.timezone) story["short_parsed_date"] = format_story_link_date__short(story_date) story["long_parsed_date"] = format_story_link_date__long(story_date) story["read_status"] = 0 if story["id"] in starred_stories: story["starred"] = True starred_date = localtime_for_timezone(starred_stories[story["id"]], user.profile.timezone) story["starred_date"] = format_story_link_date__long(starred_date) story["intelligence"] = {"feed": 0, "author": 0, "tags": 0, "title": 0} logging.info( " ---> [%s] ~FCLoading river stories: ~SB%s stories ~SN(%s feeds)" % (request.user, len(stories), len(feed_ids)) ) return dict(stories=stories)
def story_public_comments(request): format = request.REQUEST.get("format", "json") relative_user_id = request.REQUEST.get("user_id", None) feed_id = int(request.REQUEST["feed_id"]) story_id = request.REQUEST["story_id"] if not relative_user_id: relative_user_id = get_user(request).pk stories = MSharedStory.objects.filter(story_feed_id=feed_id, story_guid=story_id).limit(1) stories = Feed.format_stories(stories) stories, profiles = MSharedStory.stories_with_comments_and_profiles( stories, relative_user_id, check_all=True, public=True ) if format == "html": stories = MSharedStory.attach_users_to_stories(stories, profiles) return render_to_response( "social/story_comments.xhtml", {"story": stories[0]}, context_instance=RequestContext(request) ) else: return json.json_response(request, {"comments": stories[0]["public_comments"], "user_profiles": profiles})
def load_starred_stories(request): user = get_user(request) offset = int(request.REQUEST.get("offset", 0)) limit = int(request.REQUEST.get("limit", 10)) page = int(request.REQUEST.get("page", 0)) if page: offset = limit * page mstories = MStarredStory.objects(user_id=user.pk).order_by("-starred_date")[offset : offset + limit] stories = Feed.format_stories(mstories) for story in stories: story_date = localtime_for_timezone(story["story_date"], user.profile.timezone) story["short_parsed_date"] = format_story_link_date__short(story_date) story["long_parsed_date"] = format_story_link_date__long(story_date) starred_date = localtime_for_timezone(story["starred_date"], user.profile.timezone) story["starred_date"] = format_story_link_date__long(starred_date) story["read_status"] = 1 story["starred"] = True story["intelligence"] = {"feed": 0, "author": 0, "tags": 0, "title": 0} logging.info(" ---> [%s] ~FCLoading starred stories: ~SB%s stories" % (request.user, len(stories))) return dict(stories=stories)
def get_stories(self, offset=0, limit=6, order='newest', read_filter='all', withscores=False): r = redis.Redis(connection_pool=settings.REDIS_STORY_POOL) ignore_user_stories = False stories_key = 'F:%s' % (self.feed_id) read_stories_key = 'RS:%s:%s' % (self.user_id, self.feed_id) unread_stories_key = 'U:%s:%s' % (self.user_id, self.feed_id) unread_ranked_stories_key = 'zU:%s:%s' % (self.user_id, self.feed_id) if offset and not withscores and r.exists(unread_ranked_stories_key): pass else: r.delete(unread_ranked_stories_key) if not r.exists(stories_key): print " ---> No stories on feed: %s" % self return [] elif read_filter != 'unread' or not r.exists(read_stories_key): ignore_user_stories = True unread_stories_key = stories_key else: r.sdiffstore(unread_stories_key, stories_key, read_stories_key) sorted_stories_key = 'zF:%s' % (self.feed_id) unread_ranked_stories_key = 'zU:%s:%s' % (self.user_id, self.feed_id) r.zinterstore(unread_ranked_stories_key, [sorted_stories_key, unread_stories_key]) current_time = int(time.time() + 60*60*24) if order == 'oldest': byscorefunc = r.zrangebyscore if read_filter == 'unread' or True: min_score = int(time.mktime(self.mark_read_date.timetuple())) + 1 else: now = datetime.datetime.now() two_weeks_ago = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD) min_score = int(time.mktime(two_weeks_ago.timetuple()))-1000 max_score = current_time else: byscorefunc = r.zrevrangebyscore min_score = current_time if read_filter == 'unread': # +1 for the intersection b/w zF and F, which carries an implicit score of 1. max_score = int(time.mktime(self.mark_read_date.timetuple())) + 1 else: max_score = 0 if settings.DEBUG: debug_stories = r.zrevrange(unread_ranked_stories_key, 0, -1, withscores=True) print " ---> Unread all stories (%s - %s) %s stories: %s" % ( min_score, max_score, len(debug_stories), debug_stories) story_ids = byscorefunc(unread_ranked_stories_key, min_score, max_score, start=offset, num=500, withscores=withscores)[:limit] r.expire(unread_ranked_stories_key, 24*60*60) if not ignore_user_stories: r.delete(unread_stories_key) # XXX TODO: Remove below line after combing redis for these None's. story_ids = [s for s in story_ids if s and s != 'None'] # ugh, hack if withscores: return story_ids elif story_ids: story_date_order = "%sstory_date" % ('' if order == 'oldest' else '-') mstories = MStory.objects(id__in=story_ids).order_by(story_date_order) stories = Feed.format_stories(mstories) return stories else: return []
def calculate_feed_scores(self, silent=False, stories=None, force=False): # now = datetime.datetime.strptime("2009-07-06 22:30:03", "%Y-%m-%d %H:%M:%S") now = datetime.datetime.now() UNREAD_CUTOFF = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD) if self.user.profile.last_seen_on < UNREAD_CUTOFF and not force: # if not silent: # logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 week+)' % (self.user, self.feed)) return # if not self.feed.fetched_once: # if not silent: # logging.info(' ---> [%s] NOT Computing scores: %s' % (self.user, self.feed)) # self.needs_unread_recalc = False # self.save() # return feed_scores = dict(negative=0, neutral=0, positive=0) # Two weeks in age. If mark_read_date is older, mark old stories as read. date_delta = UNREAD_CUTOFF if date_delta < self.mark_read_date: date_delta = self.mark_read_date else: self.mark_read_date = date_delta if not stories: stories = cache.get('S:%s' % self.feed_id) unread_story_hashes = self.get_stories(read_filter='unread', limit=500, hashes_only=True) if not stories: stories_db = MStory.objects(story_hash__in=unread_story_hashes) stories = Feed.format_stories(stories_db, self.feed_id) oldest_unread_story_date = now unread_stories = [] for story in stories: if story['story_date'] < date_delta: continue if story['story_hash'] in unread_story_hashes: unread_stories.append(story) if story['story_date'] < oldest_unread_story_date: oldest_unread_story_date = story['story_date'] # if not silent: # logging.info(' ---> [%s] Format stories: %s' % (self.user, datetime.datetime.now() - now)) classifier_feeds = list(MClassifierFeed.objects(user_id=self.user_id, feed_id=self.feed_id, social_user_id=0)) classifier_authors = list(MClassifierAuthor.objects(user_id=self.user_id, feed_id=self.feed_id)) classifier_titles = list(MClassifierTitle.objects(user_id=self.user_id, feed_id=self.feed_id)) classifier_tags = list(MClassifierTag.objects(user_id=self.user_id, feed_id=self.feed_id)) # if not silent: # logging.info(' ---> [%s] Classifiers: %s (%s)' % (self.user, datetime.datetime.now() - now, classifier_feeds.count() + classifier_authors.count() + classifier_tags.count() + classifier_titles.count())) scores = { 'feed': apply_classifier_feeds(classifier_feeds, self.feed), } for story in unread_stories: scores.update({ 'author' : apply_classifier_authors(classifier_authors, story), 'tags' : apply_classifier_tags(classifier_tags, story), 'title' : apply_classifier_titles(classifier_titles, story), }) max_score = max(scores['author'], scores['tags'], scores['title']) min_score = min(scores['author'], scores['tags'], scores['title']) if max_score > 0: feed_scores['positive'] += 1 elif min_score < 0: feed_scores['negative'] += 1 else: if scores['feed'] > 0: feed_scores['positive'] += 1 elif scores['feed'] < 0: feed_scores['negative'] += 1 else: feed_scores['neutral'] += 1 # if not silent: # logging.info(' ---> [%s] End classifiers: %s' % (self.user, datetime.datetime.now() - now)) self.unread_count_positive = feed_scores['positive'] self.unread_count_neutral = feed_scores['neutral'] self.unread_count_negative = feed_scores['negative'] self.unread_count_updated = datetime.datetime.now() self.oldest_unread_story_date = oldest_unread_story_date self.needs_unread_recalc = False self.save() if (self.unread_count_positive == 0 and self.unread_count_neutral == 0): self.mark_feed_read() if not silent: logging.user(self.user, '~FC~SNComputing scores: %s (~SB%s~SN/~SB%s~SN/~SB%s~SN)' % (self.feed, feed_scores['negative'], feed_scores['neutral'], feed_scores['positive'])) return self
def load_river_stories(request): limit = 18 offset = 0 start = datetime.datetime.utcnow() user = get_user(request) feed_ids = [int(feed_id) for feed_id in request.REQUEST.getlist('feeds') if feed_id] original_feed_ids = list(feed_ids) page = int(request.REQUEST.get('page', 0))+1 read_stories_count = int(request.REQUEST.get('read_stories_count', 0)) bottom_delta = datetime.timedelta(days=settings.DAYS_OF_UNREAD) if not feed_ids: logging.user(request.user, "~FCLoading empty river stories: page %s" % (page)) return dict(stories=[]) # Fetch all stories at and before the page number. # Not a single page, because reading stories can move them up in the unread order. # `read_stories_count` is an optimization, works best when all 25 stories before have been read. limit = limit * page - read_stories_count # Read stories to exclude read_stories = MUserStory.objects(user_id=user.pk, feed_id__in=feed_ids).only('story') read_stories = [rs.story.id for rs in read_stories] # Determine mark_as_read dates for all feeds to ignore all stories before this date. # max_feed_count = 0 feed_counts = {} feed_last_reads = {} for feed_id in feed_ids: try: usersub = UserSubscription.objects.get(feed__pk=feed_id, user=user) except UserSubscription.DoesNotExist: continue if not usersub: continue feed_counts[feed_id] = (usersub.unread_count_negative * 1 + usersub.unread_count_neutral * 10 + usersub.unread_count_positive * 20) # if feed_counts[feed_id] > max_feed_count: # max_feed_count = feed_counts[feed_id] feed_last_reads[feed_id] = int(time.mktime(usersub.mark_read_date.timetuple())) feed_counts = sorted(feed_counts.items(), key=itemgetter(1))[:50] feed_ids = [f[0] for f in feed_counts] feed_last_reads = dict([(str(feed_id), feed_last_reads[feed_id]) for feed_id in feed_ids]) feed_counts = dict(feed_counts) # After excluding read stories, all that's left are stories # past the mark_read_date. Everything returned is guaranteed to be unread. mstories = MStory.objects( id__nin=read_stories, story_feed_id__in=feed_ids, story_date__gte=start - bottom_delta ).map_reduce("""function() { var d = feed_last_reads[this[~story_feed_id]]; if (this[~story_date].getTime()/1000 > d) { emit(this[~id], this); } }""", """function(key, values) { return values[0]; }""", output='inline', scope={ 'feed_last_reads': feed_last_reads } ) mstories = [story.value for story in mstories] mstories = sorted(mstories, cmp=lambda x, y: cmp(story_score(y, bottom_delta), story_score(x, bottom_delta))) # story_feed_counts = defaultdict(int) # mstories_pruned = [] # for story in mstories: # print story['story_title'], story_feed_counts[story['story_feed_id']] # if story_feed_counts[story['story_feed_id']] >= 3: continue # mstories_pruned.append(story) # story_feed_counts[story['story_feed_id']] += 1 stories = [] for i, story in enumerate(mstories): if i < offset: continue if i >= offset + limit: break stories.append(bunch(story)) stories = Feed.format_stories(stories) found_feed_ids = list(set([story['story_feed_id'] for story in stories])) # Find starred stories starred_stories = MStarredStory.objects( user_id=user.pk, story_feed_id__in=found_feed_ids ).only('story_guid', 'starred_date') starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories]) # Intelligence classifiers for all feeds involved def sort_by_feed(classifiers): feed_classifiers = defaultdict(list) for classifier in classifiers: feed_classifiers[classifier.feed_id].append(classifier) return feed_classifiers classifier_feeds = sort_by_feed(MClassifierFeed.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifier_authors = sort_by_feed(MClassifierAuthor.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifier_titles = sort_by_feed(MClassifierTitle.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifier_tags = sort_by_feed(MClassifierTag.objects(user_id=user.pk, feed_id__in=found_feed_ids)) # Just need to format stories for story in stories: story_date = localtime_for_timezone(story['story_date'], user.profile.timezone) now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone) story['short_parsed_date'] = format_story_link_date__short(story_date, now) story['long_parsed_date'] = format_story_link_date__long(story_date, now) story['read_status'] = 0 if story['id'] in starred_stories: story['starred'] = True starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone) story['starred_date'] = format_story_link_date__long(starred_date, now) story['intelligence'] = { 'feed': apply_classifier_feeds(classifier_feeds[story['story_feed_id']], story['story_feed_id']), 'author': apply_classifier_authors(classifier_authors[story['story_feed_id']], story), 'tags': apply_classifier_tags(classifier_tags[story['story_feed_id']], story), 'title': apply_classifier_titles(classifier_titles[story['story_feed_id']], story), } diff = datetime.datetime.utcnow() - start timediff = float("%s.%.2s" % (diff.seconds, (diff.microseconds / 1000))) logging.user(request.user, "~FCLoading river stories: page %s - ~SB%s/%s " "stories ~SN(%s/%s/%s feeds) ~FB(%s seconds)" % (page, len(stories), len(mstories), len(found_feed_ids), len(feed_ids), len(original_feed_ids), timediff)) return dict(stories=stories)
def load_social_page(request, user_id, username=None, **kwargs): start = time.time() user = request.user social_user_id = int(user_id) social_user = get_object_or_404(User, pk=social_user_id) offset = int(request.REQUEST.get("offset", 0)) limit = int(request.REQUEST.get("limit", 6)) page = request.REQUEST.get("page") format = request.REQUEST.get("format", None) has_next_page = False feed_id = kwargs.get("feed_id") or request.REQUEST.get("feed_id") if page: offset = limit * (int(page) - 1) user_social_profile = None user_social_services = None if user.is_authenticated(): user_social_profile = MSocialProfile.get_user(user.pk) user_social_services = MSocialServices.get_user(user.pk) social_profile = MSocialProfile.get_user(social_user_id) params = dict(user_id=social_user.pk) if feed_id: params["story_feed_id"] = feed_id mstories = MSharedStory.objects(**params).order_by("-shared_date")[offset : offset + limit + 1] stories = Feed.format_stories(mstories) if len(stories) > limit: has_next_page = True stories = stories[:-1] checkpoint1 = time.time() if not stories: params = { "user": user, "stories": [], "feeds": {}, "social_user": social_user, "social_profile": social_profile, "user_social_services": user_social_services, "user_social_profile": json.encode(user_social_profile and user_social_profile.page()), } template = "social/social_page.xhtml" return render_to_response(template, params, context_instance=RequestContext(request)) story_feed_ids = list(set(s["story_feed_id"] for s in stories)) feeds = Feed.objects.filter(pk__in=story_feed_ids) feeds = dict((feed.pk, feed.canonical(include_favicon=False)) for feed in feeds) for story in stories: if story["story_feed_id"] in feeds: # Feed could have been deleted. story["feed"] = feeds[story["story_feed_id"]] shared_date = localtime_for_timezone(story["shared_date"], social_user.profile.timezone) story["shared_date"] = shared_date stories, profiles = MSharedStory.stories_with_comments_and_profiles(stories, social_user.pk, check_all=True) checkpoint2 = time.time() if user.is_authenticated(): for story in stories: if user.pk in story["share_user_ids"]: story["shared_by_user"] = True shared_story = MSharedStory.objects.get( user_id=user.pk, story_feed_id=story["story_feed_id"], story_guid=story["id"] ) story["user_comments"] = shared_story.comments stories = MSharedStory.attach_users_to_stories(stories, profiles) params = { "social_user": social_user, "stories": stories, "user_social_profile": user_social_profile, "user_social_profile_page": json.encode(user_social_profile and user_social_profile.page()), "user_social_services": user_social_services, "user_social_services_page": json.encode(user_social_services and user_social_services.to_json()), "social_profile": social_profile, "feeds": feeds, "user_profile": hasattr(user, "profile") and user.profile, "has_next_page": has_next_page, "holzer_truism": random.choice(jennyholzer.TRUISMS), # if not has_next_page else None } diff1 = checkpoint1 - start diff2 = checkpoint2 - start timediff = time.time() - start logging.user( request, "~FYLoading ~FMsocial page~FY: ~SB%s%s ~SN(%.4s seconds, ~SB%.4s/%.4s~SN)" % (social_profile.title[:22], ("~SN/p%s" % page) if page > 1 else "", timediff, diff1, diff2), ) if format == "html": template = "social/social_stories.xhtml" else: template = "social/social_page.xhtml" return render_to_response(template, params, context_instance=RequestContext(request))
def calculate_feed_scores(self, silent=False, stories_db=None): now = datetime.datetime.utcnow() UNREAD_CUTOFF = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD) if self.user.profile.last_seen_on < UNREAD_CUTOFF: # if not silent: # logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 week+)' % (self.user, self.feed)) return if not self.feed.fetched_once: if not silent: logging.info(' ---> [%s] NOT Computing scores: %s' % (self.user, self.feed)) self.needs_unread_recalc = False self.save() return if not silent: logging.info(' ---> [%s] Computing scores: %s' % (self.user, self.feed)) feed_scores = dict(negative=0, neutral=0, positive=0) # Two weeks in age. If mark_read_date is older, mark old stories as read. date_delta = UNREAD_CUTOFF if date_delta < self.mark_read_date: date_delta = self.mark_read_date else: self.mark_read_date = date_delta read_stories = MUserStory.objects(user_id=self.user.pk, feed_id=self.feed.pk, read_date__gte=self.mark_read_date) # if not silent: # logging.info(' ---> [%s] Read stories: %s' % (self.user, datetime.datetime.now() - now)) read_stories_ids = [] for us in read_stories: if hasattr(us.story, 'story_guid') and isinstance( us.story.story_guid, unicode): read_stories_ids.append(us.story.story_guid) elif hasattr(us.story, 'id') and isinstance(us.story.id, unicode): read_stories_ids.append( us.story.id ) # TODO: Remove me after migration from story.id->guid stories_db = stories_db or MStory.objects(story_feed_id=self.feed.pk, story_date__gte=date_delta) # if not silent: # logging.info(' ---> [%s] MStory: %s' % (self.user, datetime.datetime.now() - now)) oldest_unread_story_date = now unread_stories_db = [] for story in stories_db: if story.story_date < date_delta: continue if hasattr( story, 'story_guid') and story.story_guid not in read_stories_ids: unread_stories_db.append(story) if story.story_date < oldest_unread_story_date: oldest_unread_story_date = story.story_date stories = Feed.format_stories(unread_stories_db, self.feed.pk) # if not silent: # logging.info(' ---> [%s] Format stories: %s' % (self.user, datetime.datetime.now() - now)) classifier_feeds = MClassifierFeed.objects(user_id=self.user.pk, feed_id=self.feed.pk) classifier_authors = MClassifierAuthor.objects(user_id=self.user.pk, feed_id=self.feed.pk) classifier_titles = MClassifierTitle.objects(user_id=self.user.pk, feed_id=self.feed.pk) classifier_tags = MClassifierTag.objects(user_id=self.user.pk, feed_id=self.feed.pk) # if not silent: # logging.info(' ---> [%s] Classifiers: %s (%s)' % (self.user, datetime.datetime.now() - now, classifier_feeds.count() + classifier_authors.count() + classifier_tags.count() + classifier_titles.count())) scores = { 'feed': apply_classifier_feeds(classifier_feeds, self.feed), } for story in stories: classifier_authors.rewind() classifier_tags.rewind() classifier_titles.rewind() scores.update({ 'author': apply_classifier_authors(classifier_authors, story), 'tags': apply_classifier_tags(classifier_tags, story), 'title': apply_classifier_titles(classifier_titles, story), }) max_score = max(scores['author'], scores['tags'], scores['title']) min_score = min(scores['author'], scores['tags'], scores['title']) if max_score > 0: feed_scores['positive'] += 1 elif min_score < 0: feed_scores['negative'] += 1 else: if scores['feed'] > 0: feed_scores['positive'] += 1 elif scores['feed'] < 0: feed_scores['negative'] += 1 else: feed_scores['neutral'] += 1 # if not silent: # logging.info(' ---> [%s] End classifiers: %s' % (self.user, datetime.datetime.now() - now)) self.unread_count_positive = feed_scores['positive'] self.unread_count_neutral = feed_scores['neutral'] self.unread_count_negative = feed_scores['negative'] self.unread_count_updated = datetime.datetime.now() self.oldest_unread_story_date = oldest_unread_story_date self.needs_unread_recalc = False self.save() if (self.unread_count_positive == 0 and self.unread_count_neutral == 0): self.mark_feed_read() cache.delete('usersub:%s' % self.user.id) return
def api_shared_story(request): user = request.user body = request.body_json after = body.get('after', None) before = body.get('before', None) limit = body.get('limit', 50) fields = body.get('triggerFields') blurblog_user = fields['blurblog_user'] entries = [] if isinstance(blurblog_user, int) or blurblog_user.isdigit(): social_user_ids = [int(blurblog_user)] elif blurblog_user == "all": socialsubs = MSocialSubscription.objects.filter(user_id=user.pk) social_user_ids = [ss.subscription_user_id for ss in socialsubs] mstories = MSharedStory.objects( user_id__in=social_user_ids ).order_by('-shared_date')[:limit] stories = Feed.format_stories(mstories) found_feed_ids = list(set([story['story_feed_id'] for story in stories])) share_user_ids = list(set([story['user_id'] for story in stories])) users = dict([(u.pk, u.username) for u in User.objects.filter(pk__in=share_user_ids).only('pk', 'username')]) feeds = dict([(f.pk, { "title": f.feed_title, "website": f.feed_link, "address": f.feed_address, }) for f in Feed.objects.filter(pk__in=found_feed_ids)]) classifier_feeds = list(MClassifierFeed.objects(user_id=user.pk, social_user_id__in=social_user_ids)) classifier_authors = list(MClassifierAuthor.objects(user_id=user.pk, social_user_id__in=social_user_ids)) classifier_titles = list(MClassifierTitle.objects(user_id=user.pk, social_user_id__in=social_user_ids)) classifier_tags = list(MClassifierTag.objects(user_id=user.pk, social_user_id__in=social_user_ids)) # Merge with feed specific classifiers classifier_feeds = classifier_feeds + list(MClassifierFeed.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifier_authors = classifier_authors + list(MClassifierAuthor.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifier_titles = classifier_titles + list(MClassifierTitle.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifier_tags = classifier_tags + list(MClassifierTag.objects(user_id=user.pk, feed_id__in=found_feed_ids)) for story in stories: if before and int(story['shared_date'].strftime("%s")) > before: continue if after and int(story['shared_date'].strftime("%s")) < after: continue score = compute_story_score(story, classifier_titles=classifier_titles, classifier_authors=classifier_authors, classifier_tags=classifier_tags, classifier_feeds=classifier_feeds) if score < 0: continue feed = feeds.get(story['story_feed_id'], None) entries.append({ "StoryTitle": story['story_title'], "StoryContent": story['story_content'], "StoryURL": story['story_permalink'], "StoryAuthor": story['story_authors'], "PublishedAt": story['story_date'].strftime("%Y-%m-%dT%H:%M:%SZ"), "StoryScore": score, "Comments": story['comments'], "Username": users.get(story['user_id']), "SharedAt": story['shared_date'].strftime("%Y-%m-%dT%H:%M:%SZ"), "Site": feed and feed['title'], "SiteURL": feed and feed['website'], "SiteRSS": feed and feed['address'], "ifttt": { "id": story['story_hash'], "timestamp": int(story['shared_date'].strftime("%s")) }, }) if after: entries = sorted(entries, key=lambda s: s['ifttt']['timestamp']) logging.user(request, "~FMChecking shared stories from ~SB~FCIFTTT~SN~FM: ~SB~FM%s~FM~SN - ~SB%s~SN stories" % (blurblog_user, len(entries))) return {"data": entries}
def api_unread_story(request, trigger_slug=None): user = request.user body = request.body_json after = body.get('after', None) before = body.get('before', None) limit = body.get('limit', 50) fields = body.get('triggerFields') feed_or_folder = fields['feed_or_folder'] entries = [] if isinstance(feed_or_folder, int) or feed_or_folder.isdigit(): feed_id = int(feed_or_folder) usersub = UserSubscription.objects.get(user=user, feed_id=feed_id) found_feed_ids = [feed_id] found_trained_feed_ids = [feed_id] if usersub.is_trained else [] stories = usersub.get_stories(order="newest", read_filter="unread", offset=0, limit=limit, default_cutoff_date=user.profile.unread_cutoff) else: folder_title = feed_or_folder if folder_title == "Top Level": folder_title = " " usf = UserSubscriptionFolders.objects.get(user=user) flat_folders = usf.flatten_folders() feed_ids = None if folder_title != "all": feed_ids = flat_folders.get(folder_title) usersubs = UserSubscription.subs_for_feeds(user.pk, feed_ids=feed_ids, read_filter="unread") feed_ids = [sub.feed_id for sub in usersubs] params = { "user_id": user.pk, "feed_ids": feed_ids, "offset": 0, "limit": limit, "order": "newest", "read_filter": "unread", "usersubs": usersubs, "cutoff_date": user.profile.unread_cutoff, } story_hashes, unread_feed_story_hashes = UserSubscription.feed_stories(**params) mstories = MStory.objects(story_hash__in=story_hashes).order_by('-story_date') stories = Feed.format_stories(mstories) found_feed_ids = list(set([story['story_feed_id'] for story in stories])) trained_feed_ids = [sub.feed_id for sub in usersubs if sub.is_trained] found_trained_feed_ids = list(set(trained_feed_ids) & set(found_feed_ids)) if found_trained_feed_ids: classifier_feeds = list(MClassifierFeed.objects(user_id=user.pk, feed_id__in=found_trained_feed_ids)) classifier_authors = list(MClassifierAuthor.objects(user_id=user.pk, feed_id__in=found_trained_feed_ids)) classifier_titles = list(MClassifierTitle.objects(user_id=user.pk, feed_id__in=found_trained_feed_ids)) classifier_tags = list(MClassifierTag.objects(user_id=user.pk, feed_id__in=found_trained_feed_ids)) feeds = dict([(f.pk, { "title": f.feed_title, "website": f.feed_link, "address": f.feed_address, }) for f in Feed.objects.filter(pk__in=found_feed_ids)]) for story in stories: if before and int(story['story_date'].strftime("%s")) > before: continue if after and int(story['story_date'].strftime("%s")) < after: continue score = 0 if found_trained_feed_ids and story['story_feed_id'] in found_trained_feed_ids: score = compute_story_score(story, classifier_titles=classifier_titles, classifier_authors=classifier_authors, classifier_tags=classifier_tags, classifier_feeds=classifier_feeds) if score < 0: continue if trigger_slug == "new-unread-focus-story" and score < 1: continue feed = feeds.get(story['story_feed_id'], None) entries.append({ "StoryTitle": story['story_title'], "StoryContent": story['story_content'], "StoryURL": story['story_permalink'], "StoryAuthor": story['story_authors'], "PublishedAt": story['story_date'].strftime("%Y-%m-%dT%H:%M:%SZ"), "StoryScore": score, "Site": feed and feed['title'], "SiteURL": feed and feed['website'], "SiteRSS": feed and feed['address'], "ifttt": { "id": story['story_hash'], "timestamp": int(story['story_date'].strftime("%s")) }, }) if after: entries = sorted(entries, key=lambda s: s['ifttt']['timestamp']) logging.user(request, "~FYChecking unread%s stories with ~SB~FCIFTTT~SN~FY: ~SB%s~SN - ~SB%s~SN stories" % (" ~SBfocus~SN" if trigger_slug == "new-unread-focus-story" else "", feed_or_folder, len(entries))) return {"data": entries[:limit]}
def load_river_stories(request): limit = 18 offset = 0 start = datetime.datetime.utcnow() user = get_user(request) feed_ids = [int(feed_id) for feed_id in request.REQUEST.getlist('feeds') if feed_id] original_feed_ids = list(feed_ids) page = int(request.REQUEST.get('page', 1)) read_stories_count = int(request.REQUEST.get('read_stories_count', 0)) new_flag = request.REQUEST.get('new_flag', False) bottom_delta = datetime.timedelta(days=settings.DAYS_OF_UNREAD) if not feed_ids: logging.user(request, "~FCLoading empty river stories: page %s" % (page)) return dict(stories=[]) # Fetch all stories at and before the page number. # Not a single page, because reading stories can move them up in the unread order. # `read_stories_count` is an optimization, works best when all 25 stories before have been read. limit = limit * page - read_stories_count # Read stories to exclude read_stories = MUserStory.objects(user_id=user.pk, feed_id__in=feed_ids).only('story_id') read_stories = [rs.story_id for rs in read_stories] # Determine mark_as_read dates for all feeds to ignore all stories before this date. # max_feed_count = 0 feed_counts = {} feed_last_reads = {} for feed_id in feed_ids: try: usersub = UserSubscription.objects.get(feed__pk=feed_id, user=user) except UserSubscription.DoesNotExist: continue if not usersub: continue feed_counts[feed_id] = (usersub.unread_count_negative * 1 + usersub.unread_count_neutral * 10 + usersub.unread_count_positive * 20) # if feed_counts[feed_id] > max_feed_count: # max_feed_count = feed_counts[feed_id] feed_last_reads[feed_id] = int(time.mktime(usersub.mark_read_date.timetuple())) feed_counts = sorted(feed_counts.items(), key=itemgetter(1))[:50] feed_ids = [f[0] for f in feed_counts] feed_last_reads = dict([(str(feed_id), feed_last_reads[feed_id]) for feed_id in feed_ids if feed_id in feed_last_reads]) feed_counts = dict(feed_counts) # After excluding read stories, all that's left are stories # past the mark_read_date. Everything returned is guaranteed to be unread. mstories = MStory.objects( story_guid__nin=read_stories, story_feed_id__in=feed_ids, # story_date__gte=start - bottom_delta ).map_reduce("""function() { var d = feed_last_reads[this[~story_feed_id]]; if (this[~story_date].getTime()/1000 > d) { emit(this[~id], this); } }""", """function(key, values) { return values[0]; }""", output='inline', scope={ 'feed_last_reads': feed_last_reads } ) mstories = [story.value for story in mstories if story and story.value] mstories = sorted(mstories, cmp=lambda x, y: cmp(story_score(y, bottom_delta), story_score(x, bottom_delta))) # story_feed_counts = defaultdict(int) # mstories_pruned = [] # for story in mstories: # print story['story_title'], story_feed_counts[story['story_feed_id']] # if story_feed_counts[story['story_feed_id']] >= 3: continue # mstories_pruned.append(story) # story_feed_counts[story['story_feed_id']] += 1 stories = [] for i, story in enumerate(mstories): if i < offset: continue if i >= offset + limit: break stories.append(bunch(story)) stories = Feed.format_stories(stories) found_feed_ids = list(set([story['story_feed_id'] for story in stories])) # Find starred stories starred_stories = MStarredStory.objects( user_id=user.pk, story_feed_id__in=found_feed_ids ).only('story_guid', 'starred_date') starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories]) # Intelligence classifiers for all feeds involved def sort_by_feed(classifiers): feed_classifiers = defaultdict(list) for classifier in classifiers: feed_classifiers[classifier.feed_id].append(classifier) return feed_classifiers classifier_feeds = sort_by_feed(MClassifierFeed.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifier_authors = sort_by_feed(MClassifierAuthor.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifier_titles = sort_by_feed(MClassifierTitle.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifier_tags = sort_by_feed(MClassifierTag.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifiers = {} for feed_id in found_feed_ids: classifiers[feed_id] = get_classifiers_for_user(user, feed_id, classifier_feeds[feed_id], classifier_authors[feed_id], classifier_titles[feed_id], classifier_tags[feed_id]) # Just need to format stories for story in stories: story_date = localtime_for_timezone(story['story_date'], user.profile.timezone) now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone) story['short_parsed_date'] = format_story_link_date__short(story_date, now) story['long_parsed_date'] = format_story_link_date__long(story_date, now) story['read_status'] = 0 if story['id'] in starred_stories: story['starred'] = True starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone) story['starred_date'] = format_story_link_date__long(starred_date, now) story['intelligence'] = { 'feed': apply_classifier_feeds(classifier_feeds[story['story_feed_id']], story['story_feed_id']), 'author': apply_classifier_authors(classifier_authors[story['story_feed_id']], story), 'tags': apply_classifier_tags(classifier_tags[story['story_feed_id']], story), 'title': apply_classifier_titles(classifier_titles[story['story_feed_id']], story), } diff = datetime.datetime.utcnow() - start timediff = float("%s.%.2s" % (diff.seconds, (diff.microseconds / 1000))) logging.user(request, "~FCLoading river stories: page %s - ~SB%s/%s " "stories ~SN(%s/%s/%s feeds) ~FB(%s seconds)" % (page, len(stories), len(mstories), len(found_feed_ids), len(feed_ids), len(original_feed_ids), timediff)) if new_flag: return dict(stories=stories, classifiers=classifiers) else: logging.user(request, "~BR~FCNo new flag on river") return dict(stories=stories)
def get_stories(self, offset=0, limit=6, order='newest', read_filter='all', withscores=False, hashes_only=False): r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL) ignore_user_stories = False stories_key = 'F:%s' % (self.feed_id) read_stories_key = 'RS:%s:%s' % (self.user_id, self.feed_id) unread_stories_key = 'U:%s:%s' % (self.user_id, self.feed_id) unread_ranked_stories_key = 'z%sU:%s:%s' % ('h' if hashes_only else '', self.user_id, self.feed_id) if offset and not withscores and r.exists(unread_ranked_stories_key): pass else: r.delete(unread_ranked_stories_key) if not r.exists(stories_key): print " ---> No stories on feed: %s" % self return [] elif read_filter != 'unread' or not r.exists(read_stories_key): ignore_user_stories = True unread_stories_key = stories_key else: r.sdiffstore(unread_stories_key, stories_key, read_stories_key) sorted_stories_key = 'zF:%s' % (self.feed_id) r.zinterstore(unread_ranked_stories_key, [sorted_stories_key, unread_stories_key]) current_time = int(time.time() + 60 * 60 * 24) if order == 'oldest': byscorefunc = r.zrangebyscore if read_filter == 'unread': min_score = int(time.mktime( self.mark_read_date.timetuple())) + 1 else: now = datetime.datetime.now() two_weeks_ago = now - datetime.timedelta( days=settings.DAYS_OF_UNREAD) min_score = int(time.mktime(two_weeks_ago.timetuple())) - 1000 max_score = current_time else: byscorefunc = r.zrevrangebyscore min_score = current_time if read_filter == 'unread': # +1 for the intersection b/w zF and F, which carries an implicit score of 1. max_score = int(time.mktime( self.mark_read_date.timetuple())) + 1 else: max_score = 0 if settings.DEBUG: debug_stories = r.zrevrange(unread_ranked_stories_key, 0, -1, withscores=True) print " ---> Unread all stories (%s - %s) %s stories: %s" % ( min_score, max_score, len(debug_stories), debug_stories) story_ids = byscorefunc(unread_ranked_stories_key, min_score, max_score, start=offset, num=500, withscores=withscores)[:limit] r.expire(unread_ranked_stories_key, 1 * 60 * 60) if not ignore_user_stories: r.delete(unread_stories_key) if withscores or hashes_only: return story_ids elif story_ids: story_date_order = "%sstory_date" % ('' if order == 'oldest' else '-') mstories = MStory.objects( story_hash__in=story_ids).order_by(story_date_order) stories = Feed.format_stories(mstories) return stories else: return []
def calculate_feed_scores(self, silent=False, stories=None, force=False): # now = datetime.datetime.strptime("2009-07-06 22:30:03", "%Y-%m-%d %H:%M:%S") now = datetime.datetime.now() UNREAD_CUTOFF = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD) if self.user.profile.last_seen_on < UNREAD_CUTOFF and not force: # if not silent: # logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 week+)' % (self.user, self.feed)) return # if not self.feed.fetched_once: # if not silent: # logging.info(' ---> [%s] NOT Computing scores: %s' % (self.user, self.feed)) # self.needs_unread_recalc = False # self.save() # return feed_scores = dict(negative=0, neutral=0, positive=0) # Two weeks in age. If mark_read_date is older, mark old stories as read. date_delta = UNREAD_CUTOFF if date_delta < self.mark_read_date: date_delta = self.mark_read_date else: self.mark_read_date = date_delta if not stories: stories = cache.get('S:%s' % self.feed_id) unread_story_hashes = self.get_stories(read_filter='unread', limit=500, hashes_only=True) if not stories: stories_db = MStory.objects(story_hash__in=unread_story_hashes) stories = Feed.format_stories(stories_db, self.feed_id) oldest_unread_story_date = now unread_stories = [] for story in stories: if story['story_date'] < date_delta: continue if story['story_hash'] in unread_story_hashes: unread_stories.append(story) if story['story_date'] < oldest_unread_story_date: oldest_unread_story_date = story['story_date'] # if not silent: # logging.info(' ---> [%s] Format stories: %s' % (self.user, datetime.datetime.now() - now)) classifier_feeds = list( MClassifierFeed.objects(user_id=self.user_id, feed_id=self.feed_id, social_user_id=0)) classifier_authors = list( MClassifierAuthor.objects(user_id=self.user_id, feed_id=self.feed_id)) classifier_titles = list( MClassifierTitle.objects(user_id=self.user_id, feed_id=self.feed_id)) classifier_tags = list( MClassifierTag.objects(user_id=self.user_id, feed_id=self.feed_id)) # if not silent: # logging.info(' ---> [%s] Classifiers: %s (%s)' % (self.user, datetime.datetime.now() - now, classifier_feeds.count() + classifier_authors.count() + classifier_tags.count() + classifier_titles.count())) scores = { 'feed': apply_classifier_feeds(classifier_feeds, self.feed), } for story in unread_stories: scores.update({ 'author': apply_classifier_authors(classifier_authors, story), 'tags': apply_classifier_tags(classifier_tags, story), 'title': apply_classifier_titles(classifier_titles, story), }) max_score = max(scores['author'], scores['tags'], scores['title']) min_score = min(scores['author'], scores['tags'], scores['title']) if max_score > 0: feed_scores['positive'] += 1 elif min_score < 0: feed_scores['negative'] += 1 else: if scores['feed'] > 0: feed_scores['positive'] += 1 elif scores['feed'] < 0: feed_scores['negative'] += 1 else: feed_scores['neutral'] += 1 # if not silent: # logging.info(' ---> [%s] End classifiers: %s' % (self.user, datetime.datetime.now() - now)) self.unread_count_positive = feed_scores['positive'] self.unread_count_neutral = feed_scores['neutral'] self.unread_count_negative = feed_scores['negative'] self.unread_count_updated = datetime.datetime.now() self.oldest_unread_story_date = oldest_unread_story_date self.needs_unread_recalc = False self.save() if (self.unread_count_positive == 0 and self.unread_count_neutral == 0): self.mark_feed_read() if not silent: logging.user( self.user, '~FC~SNComputing scores: %s (~SB%s~SN/~SB%s~SN/~SB%s~SN)' % (self.feed, feed_scores['negative'], feed_scores['neutral'], feed_scores['positive'])) return self
def load_social_stories(request, user_id, username=None): start = time.time() user = get_user(request) social_user_id = int(user_id) social_user = get_object_or_404(User, pk=social_user_id) offset = int(request.REQUEST.get('offset', 0)) limit = int(request.REQUEST.get('limit', 6)) page = request.REQUEST.get('page') order = request.REQUEST.get('order', 'newest') read_filter = request.REQUEST.get('read_filter', 'all') stories = [] if page: offset = limit * (int(page) - 1) now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone) UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD) social_profile = MSocialProfile.get_user(social_user.pk) try: socialsub = MSocialSubscription.objects.get(user_id=user.pk, subscription_user_id=social_user_id) except MSocialSubscription.DoesNotExist: socialsub = None mstories = MSharedStory.objects(user_id=social_user.pk).order_by('-shared_date')[offset:offset+limit] stories = Feed.format_stories(mstories) if socialsub and (read_filter == 'unread' or order == 'oldest'): story_ids = socialsub.get_stories(order=order, read_filter=read_filter, offset=offset, limit=limit) story_date_order = "%sshared_date" % ('' if order == 'oldest' else '-') if story_ids: mstories = MSharedStory.objects(user_id=social_user.pk, story_db_id__in=story_ids).order_by(story_date_order) stories = Feed.format_stories(mstories) else: mstories = MSharedStory.objects(user_id=social_user.pk).order_by('-shared_date')[offset:offset+limit] stories = Feed.format_stories(mstories) if not stories: return dict(stories=[]) checkpoint1 = time.time() stories, user_profiles = MSharedStory.stories_with_comments_and_profiles(stories, user.pk, check_all=True) story_feed_ids = list(set(s['story_feed_id'] for s in stories)) usersubs = UserSubscription.objects.filter(user__pk=user.pk, feed__pk__in=story_feed_ids) usersubs_map = dict((sub.feed_id, sub) for sub in usersubs) unsub_feed_ids = list(set(story_feed_ids).difference(set(usersubs_map.keys()))) unsub_feeds = Feed.objects.filter(pk__in=unsub_feed_ids) unsub_feeds = [feed.canonical(include_favicon=False) for feed in unsub_feeds] date_delta = UNREAD_CUTOFF if socialsub and date_delta < socialsub.mark_read_date: date_delta = socialsub.mark_read_date # Get intelligence classifier for user classifier_feeds = list(MClassifierFeed.objects(user_id=user.pk, social_user_id=social_user_id)) classifier_authors = list(MClassifierAuthor.objects(user_id=user.pk, social_user_id=social_user_id)) classifier_titles = list(MClassifierTitle.objects(user_id=user.pk, social_user_id=social_user_id)) classifier_tags = list(MClassifierTag.objects(user_id=user.pk, social_user_id=social_user_id)) # Merge with feed specific classifiers classifier_feeds = classifier_feeds + list(MClassifierFeed.objects(user_id=user.pk, feed_id__in=story_feed_ids)) classifier_authors = classifier_authors + list(MClassifierAuthor.objects(user_id=user.pk, feed_id__in=story_feed_ids)) classifier_titles = classifier_titles + list(MClassifierTitle.objects(user_id=user.pk, feed_id__in=story_feed_ids)) classifier_tags = classifier_tags + list(MClassifierTag.objects(user_id=user.pk, feed_id__in=story_feed_ids)) checkpoint2 = time.time() story_ids = [story['id'] for story in stories] userstories_db = MUserStory.objects(user_id=user.pk, feed_id__in=story_feed_ids, story_id__in=story_ids).only('story_id') userstories = set(us.story_id for us in userstories_db) starred_stories = MStarredStory.objects(user_id=user.pk, story_feed_id__in=story_feed_ids, story_guid__in=story_ids).only('story_guid', 'starred_date') shared_stories = MSharedStory.objects(user_id=user.pk, story_feed_id__in=story_feed_ids, story_guid__in=story_ids)\ .only('story_guid', 'shared_date', 'comments') starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories]) shared_stories = dict([(story.story_guid, dict(shared_date=story.shared_date, comments=story.comments)) for story in shared_stories]) for story in stories: story['social_user_id'] = social_user_id story_feed_id = story['story_feed_id'] # story_date = localtime_for_timezone(story['story_date'], user.profile.timezone) shared_date = localtime_for_timezone(story['shared_date'], user.profile.timezone) story['short_parsed_date'] = format_story_link_date__short(shared_date, now) story['long_parsed_date'] = format_story_link_date__long(shared_date, now) if not socialsub: story['read_status'] = 1 elif story['id'] in userstories: story['read_status'] = 1 elif story['shared_date'] < date_delta: story['read_status'] = 1 elif not usersubs_map.get(story_feed_id): story['read_status'] = 0 elif not story.get('read_status') and story['story_date'] < usersubs_map[story_feed_id].mark_read_date: story['read_status'] = 1 elif not story.get('read_status') and story['shared_date'] < date_delta: story['read_status'] = 1 # elif not story.get('read_status') and socialsub and story['shared_date'] > socialsub.last_read_date: # story['read_status'] = 0 else: story['read_status'] = 0 if story['id'] in starred_stories: story['starred'] = True starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone) story['starred_date'] = format_story_link_date__long(starred_date, now) if story['id'] in shared_stories: story['shared'] = True shared_date = localtime_for_timezone(shared_stories[story['id']]['shared_date'], user.profile.timezone) story['shared_date'] = format_story_link_date__long(shared_date, now) story['shared_comments'] = strip_tags(shared_stories[story['id']]['comments']) story['intelligence'] = { 'feed': apply_classifier_feeds(classifier_feeds, story['story_feed_id'], social_user_id=social_user_id), 'author': apply_classifier_authors(classifier_authors, story), 'tags': apply_classifier_tags(classifier_tags, story), 'title': apply_classifier_titles(classifier_titles, story), } classifiers = sort_classifiers_by_feed(user=user, feed_ids=story_feed_ids, classifier_feeds=classifier_feeds, classifier_authors=classifier_authors, classifier_titles=classifier_titles, classifier_tags=classifier_tags) if socialsub: socialsub.feed_opens += 1 socialsub.save() diff1 = checkpoint1-start diff2 = checkpoint2-start logging.user(request, "~FYLoading ~FMshared stories~FY: ~SB%s%s ~SN(~SB%.4ss/%.4ss~SN)" % ( social_profile.title[:22], ('~SN/p%s' % page) if page > 1 else '', diff1, diff2)) return { "stories": stories, "user_profiles": user_profiles, "feeds": unsub_feeds, "classifiers": classifiers, }
def calculate_feed_scores(self, silent=False, stories_db=None): # now = datetime.datetime.strptime("2009-07-06 22:30:03", "%Y-%m-%d %H:%M:%S") now = datetime.datetime.now() UNREAD_CUTOFF = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD) if self.user.profile.last_seen_on < UNREAD_CUTOFF: # if not silent: # logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 week+)' % (self.user, self.feed)) return if not self.feed.fetched_once: if not silent: logging.info(' ---> [%s] NOT Computing scores: %s' % (self.user, self.feed)) self.needs_unread_recalc = False self.save() return feed_scores = dict(negative=0, neutral=0, positive=0) # Two weeks in age. If mark_read_date is older, mark old stories as read. date_delta = UNREAD_CUTOFF if date_delta < self.mark_read_date: date_delta = self.mark_read_date else: self.mark_read_date = date_delta read_stories = MUserStory.objects(user_id=self.user_id, feed_id=self.feed_id, read_date__gte=self.mark_read_date) # if not silent: # logging.info(' ---> [%s] Read stories: %s' % (self.user, datetime.datetime.now() - now)) read_stories_ids = [us.story_id for us in read_stories] stories_db = stories_db or MStory.objects(story_feed_id=self.feed_id, story_date__gte=date_delta) # if not silent: # logging.info(' ---> [%s] MStory: %s' % (self.user, datetime.datetime.now() - now)) oldest_unread_story_date = now unread_stories_db = [] for story in stories_db: if story.story_date < date_delta: continue if hasattr( story, 'story_guid') and story.story_guid not in read_stories_ids: unread_stories_db.append(story) if story.story_date < oldest_unread_story_date: oldest_unread_story_date = story.story_date stories = Feed.format_stories(unread_stories_db, self.feed_id) # if not silent: # logging.info(' ---> [%s] Format stories: %s' % (self.user, datetime.datetime.now() - now)) classifier_feeds = list( MClassifierFeed.objects(user_id=self.user_id, feed_id=self.feed_id, social_user_id=0)) classifier_authors = list( MClassifierAuthor.objects(user_id=self.user_id, feed_id=self.feed_id)) classifier_titles = list( MClassifierTitle.objects(user_id=self.user_id, feed_id=self.feed_id)) classifier_tags = list( MClassifierTag.objects(user_id=self.user_id, feed_id=self.feed_id)) # if not silent: # logging.info(' ---> [%s] Classifiers: %s (%s)' % (self.user, datetime.datetime.now() - now, classifier_feeds.count() + classifier_authors.count() + classifier_tags.count() + classifier_titles.count())) scores = { 'feed': apply_classifier_feeds(classifier_feeds, self.feed), } for story in stories: scores.update({ 'author': apply_classifier_authors(classifier_authors, story), 'tags': apply_classifier_tags(classifier_tags, story), 'title': apply_classifier_titles(classifier_titles, story), }) max_score = max(scores['author'], scores['tags'], scores['title']) min_score = min(scores['author'], scores['tags'], scores['title']) if max_score > 0: feed_scores['positive'] += 1 elif min_score < 0: feed_scores['negative'] += 1 else: if scores['feed'] > 0: feed_scores['positive'] += 1 elif scores['feed'] < 0: feed_scores['negative'] += 1 else: feed_scores['neutral'] += 1 # if not silent: # logging.info(' ---> [%s] End classifiers: %s' % (self.user, datetime.datetime.now() - now)) self.unread_count_positive = feed_scores['positive'] self.unread_count_neutral = feed_scores['neutral'] self.unread_count_negative = feed_scores['negative'] self.unread_count_updated = datetime.datetime.now() self.oldest_unread_story_date = oldest_unread_story_date self.needs_unread_recalc = False self.save() if (self.unread_count_positive == 0 and self.unread_count_neutral == 0 and self.unread_count_negative == 0): self.mark_feed_read() if not silent: logging.info(' ---> [%s] Computing scores: %s (%s/%s/%s)' % (self.user, self.feed, feed_scores['negative'], feed_scores['neutral'], feed_scores['positive'])) return self
def get_stories(self, offset=0, limit=6, order="newest", read_filter="all", withscores=False, hashes_only=False): r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL) ignore_user_stories = False stories_key = "F:%s" % (self.feed_id) read_stories_key = "RS:%s:%s" % (self.user_id, self.feed_id) unread_stories_key = "U:%s:%s" % (self.user_id, self.feed_id) unread_ranked_stories_key = "z%sU:%s:%s" % ("h" if hashes_only else "", self.user_id, self.feed_id) if offset and not withscores and r.exists(unread_ranked_stories_key): pass else: r.delete(unread_ranked_stories_key) if not r.exists(stories_key): # print " ---> No stories on feed: %s" % self return [] elif read_filter != "unread" or not r.exists(read_stories_key): ignore_user_stories = True unread_stories_key = stories_key else: r.sdiffstore(unread_stories_key, stories_key, read_stories_key) sorted_stories_key = "zF:%s" % (self.feed_id) r.zinterstore(unread_ranked_stories_key, [sorted_stories_key, unread_stories_key]) current_time = int(time.time() + 60 * 60 * 24) if order == "oldest": byscorefunc = r.zrangebyscore if read_filter == "unread": min_score = int(time.mktime(self.mark_read_date.timetuple())) + 1 else: now = datetime.datetime.now() two_weeks_ago = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD) min_score = int(time.mktime(two_weeks_ago.timetuple())) - 1000 max_score = current_time else: byscorefunc = r.zrevrangebyscore min_score = current_time if read_filter == "unread": # +1 for the intersection b/w zF and F, which carries an implicit score of 1. max_score = int(time.mktime(self.mark_read_date.timetuple())) + 1 else: max_score = 0 if settings.DEBUG and False: debug_stories = r.zrevrange(unread_ranked_stories_key, 0, -1, withscores=True) print " ---> Unread all stories (%s - %s) %s stories: %s" % ( min_score, max_score, len(debug_stories), debug_stories, ) story_ids = byscorefunc( unread_ranked_stories_key, min_score, max_score, start=offset, num=500, withscores=withscores )[:limit] r.expire(unread_ranked_stories_key, 1 * 60 * 60) if not ignore_user_stories: r.delete(unread_stories_key) if withscores: story_ids = [(s[0], int(s[1])) for s in story_ids] if withscores or hashes_only: return story_ids elif story_ids: story_date_order = "%sstory_date" % ("" if order == "oldest" else "-") mstories = MStory.objects(story_hash__in=story_ids).order_by(story_date_order) stories = Feed.format_stories(mstories) return stories else: return []
def calculate_feed_scores(self, silent=False, stories_db=None): # now = datetime.datetime.strptime("2009-07-06 22:30:03", "%Y-%m-%d %H:%M:%S") now = datetime.datetime.now() UNREAD_CUTOFF = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD) if self.user.profile.last_seen_on < UNREAD_CUTOFF: # if not silent: # logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 week+)' % (self.user, self.feed)) return if not self.feed.fetched_once: if not silent: logging.info(' ---> [%s] NOT Computing scores: %s' % (self.user, self.feed)) self.needs_unread_recalc = False self.save() return feed_scores = dict(negative=0, neutral=0, positive=0) # Two weeks in age. If mark_read_date is older, mark old stories as read. date_delta = UNREAD_CUTOFF if date_delta < self.mark_read_date: date_delta = self.mark_read_date else: self.mark_read_date = date_delta read_stories = MUserStory.objects(user_id=self.user.pk, feed_id=self.feed.pk, read_date__gte=self.mark_read_date) # if not silent: # logging.info(' ---> [%s] Read stories: %s' % (self.user, datetime.datetime.now() - now)) read_stories_ids = [] for us in read_stories: read_stories_ids.append(us.story_id) stories_db = stories_db or MStory.objects(story_feed_id=self.feed.pk, story_date__gte=date_delta) # if not silent: # logging.info(' ---> [%s] MStory: %s' % (self.user, datetime.datetime.now() - now)) oldest_unread_story_date = now unread_stories_db = [] for story in stories_db: if story.story_date < date_delta: continue if hasattr(story, 'story_guid') and story.story_guid not in read_stories_ids: unread_stories_db.append(story) if story.story_date < oldest_unread_story_date: oldest_unread_story_date = story.story_date stories = Feed.format_stories(unread_stories_db, self.feed.pk) # if not silent: # logging.info(' ---> [%s] Format stories: %s' % (self.user, datetime.datetime.now() - now)) classifier_feeds = list(MClassifierFeed.objects(user_id=self.user.pk, feed_id=self.feed.pk)) classifier_authors = list(MClassifierAuthor.objects(user_id=self.user.pk, feed_id=self.feed.pk)) classifier_titles = list(MClassifierTitle.objects(user_id=self.user.pk, feed_id=self.feed.pk)) classifier_tags = list(MClassifierTag.objects(user_id=self.user.pk, feed_id=self.feed.pk)) # if not silent: # logging.info(' ---> [%s] Classifiers: %s (%s)' % (self.user, datetime.datetime.now() - now, classifier_feeds.count() + classifier_authors.count() + classifier_tags.count() + classifier_titles.count())) scores = { 'feed': apply_classifier_feeds(classifier_feeds, self.feed), } for story in stories: scores.update({ 'author' : apply_classifier_authors(classifier_authors, story), 'tags' : apply_classifier_tags(classifier_tags, story), 'title' : apply_classifier_titles(classifier_titles, story), }) max_score = max(scores['author'], scores['tags'], scores['title']) min_score = min(scores['author'], scores['tags'], scores['title']) if max_score > 0: feed_scores['positive'] += 1 elif min_score < 0: feed_scores['negative'] += 1 else: if scores['feed'] > 0: feed_scores['positive'] += 1 elif scores['feed'] < 0: feed_scores['negative'] += 1 else: feed_scores['neutral'] += 1 # if not silent: # logging.info(' ---> [%s] End classifiers: %s' % (self.user, datetime.datetime.now() - now)) self.unread_count_positive = feed_scores['positive'] self.unread_count_neutral = feed_scores['neutral'] self.unread_count_negative = feed_scores['negative'] self.unread_count_updated = datetime.datetime.now() self.oldest_unread_story_date = oldest_unread_story_date self.needs_unread_recalc = False self.save() # if (self.unread_count_positive == 0 and # self.unread_count_neutral == 0): # self.mark_feed_read() cache.delete('usersub:%s' % self.user.id) if not silent: logging.info(' ---> [%s] Computing scores: %s (%s/%s/%s)' % (self.user, self.feed, feed_scores['negative'], feed_scores['neutral'], feed_scores['positive'])) return
def calculate_feed_scores(self, silent=False, stories=None, force=False): # now = datetime.datetime.strptime("2009-07-06 22:30:03", "%Y-%m-%d %H:%M:%S") now = datetime.datetime.now() oldest_unread_story_date = now if self.user.profile.last_seen_on < self.user.profile.unread_cutoff and not force: # if not silent: # logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 week+)' % (self.user, self.feed)) return self ong = self.unread_count_negative ont = self.unread_count_neutral ops = self.unread_count_positive # if not self.feed.fetched_once: # if not silent: # logging.info(' ---> [%s] NOT Computing scores: %s' % (self.user, self.feed)) # self.needs_unread_recalc = False # self.save() # return feed_scores = dict(negative=0, neutral=0, positive=0) # Two weeks in age. If mark_read_date is older, mark old stories as read. date_delta = self.user.profile.unread_cutoff if date_delta < self.mark_read_date: date_delta = self.mark_read_date else: self.mark_read_date = date_delta if self.is_trained: if not stories: stories = cache.get('S:%s' % self.feed_id) unread_story_hashes = self.story_hashes(user_id=self.user_id, feed_ids=[self.feed_id], usersubs=[self], read_filter='unread', group_by_feed=False, cutoff_date=self.user.profile.unread_cutoff) if not stories: stories_db = MStory.objects(story_hash__in=unread_story_hashes) stories = Feed.format_stories(stories_db, self.feed_id) unread_stories = [] for story in stories: if story['story_date'] < date_delta: continue if story['story_hash'] in unread_story_hashes: unread_stories.append(story) if story['story_date'] < oldest_unread_story_date: oldest_unread_story_date = story['story_date'] # if not silent: # logging.info(' ---> [%s] Format stories: %s' % (self.user, datetime.datetime.now() - now)) classifier_feeds = list(MClassifierFeed.objects(user_id=self.user_id, feed_id=self.feed_id, social_user_id=0)) classifier_authors = list(MClassifierAuthor.objects(user_id=self.user_id, feed_id=self.feed_id)) classifier_titles = list(MClassifierTitle.objects(user_id=self.user_id, feed_id=self.feed_id)) classifier_tags = list(MClassifierTag.objects(user_id=self.user_id, feed_id=self.feed_id)) if (not len(classifier_feeds) and not len(classifier_authors) and not len(classifier_titles) and not len(classifier_tags)): self.is_trained = False # if not silent: # logging.info(' ---> [%s] Classifiers: %s (%s)' % (self.user, datetime.datetime.now() - now, classifier_feeds.count() + classifier_authors.count() + classifier_tags.count() + classifier_titles.count())) scores = { 'feed': apply_classifier_feeds(classifier_feeds, self.feed), } for story in unread_stories: scores.update({ 'author' : apply_classifier_authors(classifier_authors, story), 'tags' : apply_classifier_tags(classifier_tags, story), 'title' : apply_classifier_titles(classifier_titles, story), }) max_score = max(scores['author'], scores['tags'], scores['title']) min_score = min(scores['author'], scores['tags'], scores['title']) if max_score > 0: feed_scores['positive'] += 1 elif min_score < 0: feed_scores['negative'] += 1 else: if scores['feed'] > 0: feed_scores['positive'] += 1 elif scores['feed'] < 0: feed_scores['negative'] += 1 else: feed_scores['neutral'] += 1 else: unread_story_hashes = self.story_hashes(user_id=self.user_id, feed_ids=[self.feed_id], usersubs=[self], read_filter='unread', group_by_feed=False, include_timestamps=True, cutoff_date=self.user.profile.unread_cutoff) feed_scores['neutral'] = len(unread_story_hashes) if feed_scores['neutral']: oldest_unread_story_date = datetime.datetime.fromtimestamp(unread_story_hashes[-1][1]) if not silent: logging.user(self.user, '~FBUnread count (~SB%s~SN%s): ~SN(~FC%s~FB/~FC%s~FB/~FC%s~FB) ~SBto~SN (~FC%s~FB/~FC%s~FB/~FC%s~FB)' % (self.feed_id, '/~FMtrained~FB' if self.is_trained else '', ong, ont, ops, feed_scores['negative'], feed_scores['neutral'], feed_scores['positive'])) self.unread_count_positive = feed_scores['positive'] self.unread_count_neutral = feed_scores['neutral'] self.unread_count_negative = feed_scores['negative'] self.unread_count_updated = datetime.datetime.now() self.oldest_unread_story_date = oldest_unread_story_date self.needs_unread_recalc = False self.save() if (self.unread_count_positive == 0 and self.unread_count_neutral == 0): self.mark_feed_read() if not silent: logging.user(self.user, '~FC~SNComputing scores: %s (~SB%s~SN/~SB%s~SN/~SB%s~SN)' % (self.feed, feed_scores['negative'], feed_scores['neutral'], feed_scores['positive'])) self.trim_read_stories() return self
mstories = sorted(mstories, cmp=lambda x, y: cmp(story_score(y, bottom_delta), story_score(x, bottom_delta))) # story_feed_counts = defaultdict(int) # mstories_pruned = [] # for story in mstories: # print story['story_title'], story_feed_counts[story['story_feed_id']] # if story_feed_counts[story['story_feed_id']] >= 3: continue # mstories_pruned.append(story) # story_feed_counts[story['story_feed_id']] += 1 stories = [] for i, story in enumerate(mstories): if i < offset: continue if i >= offset + limit: break stories.append(bunch(story)) stories = Feed.format_stories(stories) found_feed_ids = list(set([story['story_feed_id'] for story in stories])) # Find starred stories starred_stories = MStarredStory.objects( user_id=user.pk, story_feed_id__in=found_feed_ids ).only('story_guid', 'starred_date') starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories]) # Intelligence classifiers for all feeds involved def sort_by_feed(classifiers): feed_classifiers = defaultdict(list) for classifier in classifiers: feed_classifiers[classifier.feed_id].append(classifier)
def calculate_feed_scores(self, silent=False, stories=None, force=False): # now = datetime.datetime.strptime("2009-07-06 22:30:03", "%Y-%m-%d %H:%M:%S") now = datetime.datetime.now() UNREAD_CUTOFF = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD) oldest_unread_story_date = now if self.user.profile.last_seen_on < UNREAD_CUTOFF and not force: # if not silent: # logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 week+)' % (self.user, self.feed)) return self ong = self.unread_count_negative ont = self.unread_count_neutral ops = self.unread_count_positive # if not self.feed.fetched_once: # if not silent: # logging.info(' ---> [%s] NOT Computing scores: %s' % (self.user, self.feed)) # self.needs_unread_recalc = False # self.save() # return feed_scores = dict(negative=0, neutral=0, positive=0) # Two weeks in age. If mark_read_date is older, mark old stories as read. date_delta = UNREAD_CUTOFF if date_delta < self.mark_read_date: date_delta = self.mark_read_date else: self.mark_read_date = date_delta if self.is_trained: if not stories: stories = cache.get("S:%s" % self.feed_id) unread_story_hashes = self.get_stories(read_filter="unread", limit=500, hashes_only=True) if not stories: stories_db = MStory.objects(story_hash__in=unread_story_hashes) stories = Feed.format_stories(stories_db, self.feed_id) unread_stories = [] for story in stories: if story["story_date"] < date_delta: continue if story["story_hash"] in unread_story_hashes: unread_stories.append(story) if story["story_date"] < oldest_unread_story_date: oldest_unread_story_date = story["story_date"] # if not silent: # logging.info(' ---> [%s] Format stories: %s' % (self.user, datetime.datetime.now() - now)) classifier_feeds = list( MClassifierFeed.objects(user_id=self.user_id, feed_id=self.feed_id, social_user_id=0) ) classifier_authors = list(MClassifierAuthor.objects(user_id=self.user_id, feed_id=self.feed_id)) classifier_titles = list(MClassifierTitle.objects(user_id=self.user_id, feed_id=self.feed_id)) classifier_tags = list(MClassifierTag.objects(user_id=self.user_id, feed_id=self.feed_id)) if ( not len(classifier_feeds) and not len(classifier_authors) and not len(classifier_titles) and not len(classifier_tags) ): self.is_trained = False # if not silent: # logging.info(' ---> [%s] Classifiers: %s (%s)' % (self.user, datetime.datetime.now() - now, classifier_feeds.count() + classifier_authors.count() + classifier_tags.count() + classifier_titles.count())) scores = {"feed": apply_classifier_feeds(classifier_feeds, self.feed)} for story in unread_stories: scores.update( { "author": apply_classifier_authors(classifier_authors, story), "tags": apply_classifier_tags(classifier_tags, story), "title": apply_classifier_titles(classifier_titles, story), } ) max_score = max(scores["author"], scores["tags"], scores["title"]) min_score = min(scores["author"], scores["tags"], scores["title"]) if max_score > 0: feed_scores["positive"] += 1 elif min_score < 0: feed_scores["negative"] += 1 else: if scores["feed"] > 0: feed_scores["positive"] += 1 elif scores["feed"] < 0: feed_scores["negative"] += 1 else: feed_scores["neutral"] += 1 else: unread_story_hashes = self.get_stories(read_filter="unread", limit=500, hashes_only=True, withscores=True) feed_scores["neutral"] = len(unread_story_hashes) if feed_scores["neutral"]: oldest_unread_story_date = datetime.datetime.fromtimestamp(unread_story_hashes[-1][1]) if not silent: logging.user( self.user, "~FBUnread count (~SB%s~SN%s): ~SN(~FC%s~FB/~FC%s~FB/~FC%s~FB) ~SBto~SN (~FC%s~FB/~FC%s~FB/~FC%s~FB)" % ( self.feed_id, "/~FMtrained~FB" if self.is_trained else "", ong, ont, ops, feed_scores["negative"], feed_scores["neutral"], feed_scores["positive"], ), ) self.unread_count_positive = feed_scores["positive"] self.unread_count_neutral = feed_scores["neutral"] self.unread_count_negative = feed_scores["negative"] self.unread_count_updated = datetime.datetime.now() self.oldest_unread_story_date = oldest_unread_story_date self.needs_unread_recalc = False self.save() if self.unread_count_positive == 0 and self.unread_count_neutral == 0: self.mark_feed_read() if not silent: logging.user( self.user, "~FC~SNComputing scores: %s (~SB%s~SN/~SB%s~SN/~SB%s~SN)" % (self.feed, feed_scores["negative"], feed_scores["neutral"], feed_scores["positive"]), ) return self
def load_social_page(request, user_id, username=None, **kwargs): start = time.time() user = request.user social_user_id = int(user_id) social_user = get_object_or_404(User, pk=social_user_id) offset = int(request.REQUEST.get('offset', 0)) limit = int(request.REQUEST.get('limit', 6)) page = request.REQUEST.get('page') format = request.REQUEST.get('format', None) has_next_page = False feed_id = kwargs.get('feed_id') or request.REQUEST.get('feed_id') if page: offset = limit * (int(page) - 1) user_social_profile = None if user.is_authenticated(): user_social_profile = MSocialProfile.get_user(user.pk) social_profile = MSocialProfile.get_user(social_user_id) params = dict(user_id=social_user.pk) if feed_id: params['story_feed_id'] = feed_id mstories = MSharedStory.objects(**params).order_by('-shared_date')[offset:offset+limit+1] stories = Feed.format_stories(mstories) if len(stories) > limit: has_next_page = True stories = stories[:-1] checkpoint1 = time.time() if not stories: params = { "user": user, "stories": [], "feeds": {}, "social_user": social_user, "social_profile": social_profile, 'user_social_profile' : json.encode(user_social_profile and user_social_profile.page()), } template = 'social/social_page.xhtml' return render_to_response(template, params, context_instance=RequestContext(request)) story_feed_ids = list(set(s['story_feed_id'] for s in stories)) feeds = Feed.objects.filter(pk__in=story_feed_ids) feeds = dict((feed.pk, feed.canonical(include_favicon=False)) for feed in feeds) for story in stories: if story['story_feed_id'] in feeds: # Feed could have been deleted. story['feed'] = feeds[story['story_feed_id']] shared_date = localtime_for_timezone(story['shared_date'], social_user.profile.timezone) story['shared_date'] = shared_date stories, profiles = MSharedStory.stories_with_comments_and_profiles(stories, social_user.pk, check_all=True) checkpoint2 = time.time() if user.is_authenticated(): for story in stories: if user.pk in story['shared_by_friends'] or user.pk in story['shared_by_public']: story['shared_by_user'] = True shared_story = MSharedStory.objects.get(user_id=user.pk, story_feed_id=story['story_feed_id'], story_guid=story['id']) story['user_comments'] = shared_story.comments stories = MSharedStory.attach_users_to_stories(stories, profiles) params = { 'social_user' : social_user, 'stories' : stories, 'user_social_profile' : json.encode(user_social_profile and user_social_profile.page()), 'social_profile': social_profile, 'feeds' : feeds, 'user_profile' : hasattr(user, 'profile') and user.profile, 'has_next_page' : has_next_page, 'holzer_truism' : random.choice(jennyholzer.TRUISMS) #if not has_next_page else None } diff1 = checkpoint1-start diff2 = checkpoint2-start timediff = time.time()-start logging.user(request, "~FYLoading ~FMsocial page~FY: ~SB%s%s ~SN(%.4s seconds, ~SB%.4s/%.4s~SN)" % ( social_profile.title[:22], ('~SN/p%s' % page) if page > 1 else '', timediff, diff1, diff2)) if format == 'html': template = 'social/social_stories.xhtml' else: template = 'social/social_page.xhtml' return render_to_response(template, params, context_instance=RequestContext(request))
def api_shared_story(request): user = request.user body = request.body_json after = body.get('after', None) before = body.get('before', None) limit = body.get('limit', 50) fields = body.get('triggerFields') blurblog_user = fields['blurblog_user'] entries = [] if isinstance(blurblog_user, int) or blurblog_user.isdigit(): social_user_ids = [int(blurblog_user)] elif blurblog_user == "all": socialsubs = MSocialSubscription.objects.filter(user_id=user.pk) social_user_ids = [ss.subscription_user_id for ss in socialsubs] mstories = MSharedStory.objects( user_id__in=social_user_ids ).order_by('-shared_date')[:limit] stories = Feed.format_stories(mstories) found_feed_ids = list(set([story['story_feed_id'] for story in stories])) share_user_ids = list(set([story['user_id'] for story in stories])) users = dict([(u.pk, u.username) for u in User.objects.filter(pk__in=share_user_ids).only('pk', 'username')]) feeds = dict([(f.pk, { "title": f.feed_title, "website": f.feed_link, "address": f.feed_address, }) for f in Feed.objects.filter(pk__in=found_feed_ids)]) classifier_feeds = list(MClassifierFeed.objects(user_id=user.pk, social_user_id__in=social_user_ids)) classifier_authors = list(MClassifierAuthor.objects(user_id=user.pk, social_user_id__in=social_user_ids)) classifier_titles = list(MClassifierTitle.objects(user_id=user.pk, social_user_id__in=social_user_ids)) classifier_tags = list(MClassifierTag.objects(user_id=user.pk, social_user_id__in=social_user_ids)) # Merge with feed specific classifiers classifier_feeds = classifier_feeds + list(MClassifierFeed.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifier_authors = classifier_authors + list(MClassifierAuthor.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifier_titles = classifier_titles + list(MClassifierTitle.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifier_tags = classifier_tags + list(MClassifierTag.objects(user_id=user.pk, feed_id__in=found_feed_ids)) for story in stories: if before and int(story['shared_date'].strftime("%s")) > before: continue if after and int(story['shared_date'].strftime("%s")) < after: continue score = compute_story_score(story, classifier_titles=classifier_titles, classifier_authors=classifier_authors, classifier_tags=classifier_tags, classifier_feeds=classifier_feeds) if score < 0: continue feed = feeds.get(story['story_feed_id'], None) entries.append({ "StoryTitle": story['story_title'], "StoryContent": story['story_content'], "StoryURL": story['story_permalink'], "StoryAuthor": story['story_authors'], "PublishedAt": story['story_date'].strftime("%Y-%m-%dT%H:%M:%SZ"), "StoryScore": score, "Comments": story['comments'], "Username": users.get(story['user_id']), "SharedAt": story['shared_date'].strftime("%Y-%m-%dT%H:%M:%SZ"), "Site": feed and feed['title'], "SiteURL": feed and feed['website'], "SiteRSS": feed and feed['address'], "meta": { "id": story['story_hash'], "timestamp": int(story['shared_date'].strftime("%s")) }, }) if after: entries = sorted(entries, key=lambda s: s['meta']['timestamp']) logging.user(request, "~FMChecking shared stories from ~SB~FCIFTTT~SN~FM: ~SB~FM%s~FM~SN - ~SB%s~SN stories" % (blurblog_user, len(entries))) return {"data": entries}
def load_river_blurblog(request): limit = 10 start = time.time() user = get_user(request) social_user_ids = [int(uid) for uid in request.REQUEST.getlist("social_user_ids") if uid] original_user_ids = list(social_user_ids) page = int(request.REQUEST.get("page", 1)) order = request.REQUEST.get("order", "newest") read_filter = request.REQUEST.get("read_filter", "unread") relative_user_id = request.REQUEST.get("relative_user_id", None) now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone) UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD) if not relative_user_id: relative_user_id = get_user(request).pk if not social_user_ids: socialsubs = MSocialSubscription.objects.filter(user_id=user.pk) social_user_ids = [s.subscription_user_id for s in socialsubs] offset = (page - 1) * limit limit = page * limit - 1 story_ids, story_dates = MSocialSubscription.feed_stories( user.pk, social_user_ids, offset=offset, limit=limit, order=order, read_filter=read_filter ) mstories = MStory.objects(id__in=story_ids) story_id_to_dates = dict(zip(story_ids, story_dates)) def sort_stories_by_id(a, b): return int(story_id_to_dates[str(b.id)]) - int(story_id_to_dates[str(a.id)]) sorted_mstories = sorted(mstories, cmp=sort_stories_by_id) stories = Feed.format_stories(sorted_mstories) for s, story in enumerate(stories): story["story_date"] = datetime.datetime.fromtimestamp(story_dates[s]) stories, user_profiles = MSharedStory.stories_with_comments_and_profiles(stories, relative_user_id, check_all=True) story_feed_ids = list(set(s["story_feed_id"] for s in stories)) usersubs = UserSubscription.objects.filter(user__pk=user.pk, feed__pk__in=story_feed_ids) usersubs_map = dict((sub.feed_id, sub) for sub in usersubs) unsub_feed_ids = list(set(story_feed_ids).difference(set(usersubs_map.keys()))) unsub_feeds = Feed.objects.filter(pk__in=unsub_feed_ids) unsub_feeds = [feed.canonical(include_favicon=False) for feed in unsub_feeds] # Find starred stories if story_feed_ids: story_ids = [story["id"] for story in stories] starred_stories = MStarredStory.objects(user_id=user.pk, story_guid__in=story_ids).only( "story_guid", "starred_date" ) starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories]) shared_stories = MSharedStory.objects(user_id=user.pk, story_guid__in=story_ids).only( "story_guid", "shared_date", "comments" ) shared_stories = dict( [ (story.story_guid, dict(shared_date=story.shared_date, comments=story.comments)) for story in shared_stories ] ) userstories_db = MUserStory.objects(user_id=user.pk, feed_id__in=story_feed_ids, story_id__in=story_ids).only( "story_id" ) userstories = set(us.story_id for us in userstories_db) else: starred_stories = {} shared_stories = {} userstories = [] # Intelligence classifiers for all feeds involved if story_feed_ids: classifier_feeds = list(MClassifierFeed.objects(user_id=user.pk, feed_id__in=story_feed_ids)) classifier_authors = list(MClassifierAuthor.objects(user_id=user.pk, feed_id__in=story_feed_ids)) classifier_titles = list(MClassifierTitle.objects(user_id=user.pk, feed_id__in=story_feed_ids)) classifier_tags = list(MClassifierTag.objects(user_id=user.pk, feed_id__in=story_feed_ids)) else: classifier_feeds = [] classifier_authors = [] classifier_titles = [] classifier_tags = [] classifiers = sort_classifiers_by_feed( user=user, feed_ids=story_feed_ids, classifier_feeds=classifier_feeds, classifier_authors=classifier_authors, classifier_titles=classifier_titles, classifier_tags=classifier_tags, ) # Just need to format stories for story in stories: if story["id"] in userstories: story["read_status"] = 1 elif story["story_date"] < UNREAD_CUTOFF: story["read_status"] = 1 else: story["read_status"] = 0 story_date = localtime_for_timezone(story["story_date"], user.profile.timezone) story["short_parsed_date"] = format_story_link_date__short(story_date, now) story["long_parsed_date"] = format_story_link_date__long(story_date, now) if story["id"] in starred_stories: story["starred"] = True starred_date = localtime_for_timezone(starred_stories[story["id"]], user.profile.timezone) story["starred_date"] = format_story_link_date__long(starred_date, now) story["intelligence"] = { "feed": apply_classifier_feeds(classifier_feeds, story["story_feed_id"]), "author": apply_classifier_authors(classifier_authors, story), "tags": apply_classifier_tags(classifier_tags, story), "title": apply_classifier_titles(classifier_titles, story), } if story["id"] in shared_stories: story["shared"] = True shared_date = localtime_for_timezone(shared_stories[story["id"]]["shared_date"], user.profile.timezone) story["shared_date"] = format_story_link_date__long(shared_date, now) story["shared_comments"] = strip_tags(shared_stories[story["id"]]["comments"]) diff = time.time() - start timediff = round(float(diff), 2) logging.user( request, "~FYLoading ~FCriver blurblogs stories~FY: ~SBp%s~SN (%s/%s " "stories, ~SN%s/%s/%s feeds)" % (page, len(stories), len(mstories), len(story_feed_ids), len(social_user_ids), len(original_user_ids)), ) return { "stories": stories, "user_profiles": user_profiles, "feeds": unsub_feeds, "classifiers": classifiers, "elapsed_time": timediff, }
# Prune the river to only include a set number of stories per feed # story_feed_counts = defaultdict(int) # mstories_pruned = [] # for story in mstories: # print story['story_title'], story_feed_counts[story['story_feed_id']] # if story_feed_counts[story['story_feed_id']] >= 3: continue # mstories_pruned.append(story) # story_feed_counts[story['story_feed_id']] += 1 stories = [] for i, story in enumerate(mstories): if i < offset: continue if i >= limit: break stories.append(bunch(story)) stories = Feed.format_stories(stories) found_feed_ids = list(set([story['story_feed_id'] for story in stories])) # Find starred stories try: starred_stories = MStarredStory.objects( user_id=user.pk, story_feed_id__in=found_feed_ids ).only('story_guid', 'starred_date') starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories]) except OperationFailure: logging.info(" ***> Starred stories failure") starred_stories = {} # Intelligence classifiers for all feeds involved
def get_stories(self, offset=0, limit=6, order='newest', read_filter='all', withscores=False, hashes_only=False, cutoff_date=None): r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL) rt = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_TEMP_POOL) ignore_user_stories = False stories_key = 'F:%s' % (self.feed_id) read_stories_key = 'RS:%s:%s' % (self.user_id, self.feed_id) unread_stories_key = 'U:%s:%s' % (self.user_id, self.feed_id) unread_ranked_stories_key = 'z%sU:%s:%s' % ('h' if hashes_only else '', self.user_id, self.feed_id) if withscores or not offset or not rt.exists(unread_ranked_stories_key): rt.delete(unread_ranked_stories_key) if not r.exists(stories_key): # print " ---> No stories on feed: %s" % self return [] elif read_filter == 'all' or not r.exists(read_stories_key): ignore_user_stories = True unread_stories_key = stories_key else: r.sdiffstore(unread_stories_key, stories_key, read_stories_key) sorted_stories_key = 'zF:%s' % (self.feed_id) r.zinterstore(unread_ranked_stories_key, [sorted_stories_key, unread_stories_key]) if not ignore_user_stories: r.delete(unread_stories_key) dump = r.dump(unread_ranked_stories_key) if dump: pipeline = rt.pipeline() pipeline.delete(unread_ranked_stories_key) pipeline.restore(unread_ranked_stories_key, 1*60*60*1000, dump) pipeline.execute() r.delete(unread_ranked_stories_key) current_time = int(time.time() + 60*60*24) if not cutoff_date: cutoff_date = datetime.datetime.now() - datetime.timedelta(days=settings.DAYS_OF_UNREAD) if order == 'oldest': byscorefunc = rt.zrangebyscore if read_filter == 'unread': min_score = int(time.mktime(self.mark_read_date.timetuple())) + 1 else: min_score = int(time.mktime(cutoff_date.timetuple()))-1000 max_score = current_time else: byscorefunc = rt.zrevrangebyscore min_score = current_time if read_filter == 'unread': # +1 for the intersection b/w zF and F, which carries an implicit score of 1. max_score = int(time.mktime(self.mark_read_date.timetuple())) + 1 else: max_score = 0 if settings.DEBUG and False: debug_stories = rt.zrevrange(unread_ranked_stories_key, 0, -1, withscores=True) print " ---> Unread all stories (%s - %s) %s stories: %s" % ( min_score, max_score, len(debug_stories), debug_stories) story_ids = byscorefunc(unread_ranked_stories_key, min_score, max_score, start=offset, num=500, withscores=withscores)[:limit] if withscores: story_ids = [(s[0], int(s[1])) for s in story_ids] if withscores or hashes_only: return story_ids elif story_ids: story_date_order = "%sstory_date" % ('' if order == 'oldest' else '-') mstories = MStory.objects(story_hash__in=story_ids).order_by(story_date_order) stories = Feed.format_stories(mstories) return stories else: return []
def api_unread_story(request, trigger_slug=None): user = request.user body = request.body_json after = body.get('after', None) before = body.get('before', None) limit = body.get('limit', 50) fields = body.get('triggerFields') feed_or_folder = fields['feed_or_folder'] entries = [] if isinstance(feed_or_folder, int) or feed_or_folder.isdigit(): feed_id = int(feed_or_folder) try: usersub = UserSubscription.objects.get(user=user, feed_id=feed_id) except UserSubscription.DoesNotExist: return dict(data=[]) found_feed_ids = [feed_id] found_trained_feed_ids = [feed_id] if usersub.is_trained else [] stories = usersub.get_stories(order="newest", read_filter="unread", offset=0, limit=limit, default_cutoff_date=user.profile.unread_cutoff) else: folder_title = feed_or_folder if folder_title == "Top Level": folder_title = " " usf = UserSubscriptionFolders.objects.get(user=user) flat_folders = usf.flatten_folders() feed_ids = None if folder_title != "all": feed_ids = flat_folders.get(folder_title) usersubs = UserSubscription.subs_for_feeds(user.pk, feed_ids=feed_ids, read_filter="unread") feed_ids = [sub.feed_id for sub in usersubs] params = { "user_id": user.pk, "feed_ids": feed_ids, "offset": 0, "limit": limit, "order": "newest", "read_filter": "unread", "usersubs": usersubs, "cutoff_date": user.profile.unread_cutoff, } story_hashes, unread_feed_story_hashes = UserSubscription.feed_stories(**params) mstories = MStory.objects(story_hash__in=story_hashes).order_by('-story_date') stories = Feed.format_stories(mstories) found_feed_ids = list(set([story['story_feed_id'] for story in stories])) trained_feed_ids = [sub.feed_id for sub in usersubs if sub.is_trained] found_trained_feed_ids = list(set(trained_feed_ids) & set(found_feed_ids)) if found_trained_feed_ids: classifier_feeds = list(MClassifierFeed.objects(user_id=user.pk, feed_id__in=found_trained_feed_ids)) classifier_authors = list(MClassifierAuthor.objects(user_id=user.pk, feed_id__in=found_trained_feed_ids)) classifier_titles = list(MClassifierTitle.objects(user_id=user.pk, feed_id__in=found_trained_feed_ids)) classifier_tags = list(MClassifierTag.objects(user_id=user.pk, feed_id__in=found_trained_feed_ids)) feeds = dict([(f.pk, { "title": f.feed_title, "website": f.feed_link, "address": f.feed_address, }) for f in Feed.objects.filter(pk__in=found_feed_ids)]) for story in stories: if before and int(story['story_date'].strftime("%s")) > before: continue if after and int(story['story_date'].strftime("%s")) < after: continue score = 0 if found_trained_feed_ids and story['story_feed_id'] in found_trained_feed_ids: score = compute_story_score(story, classifier_titles=classifier_titles, classifier_authors=classifier_authors, classifier_tags=classifier_tags, classifier_feeds=classifier_feeds) if score < 0: continue if trigger_slug == "new-unread-focus-story" and score < 1: continue feed = feeds.get(story['story_feed_id'], None) entries.append({ "StoryTitle": story['story_title'], "StoryContent": story['story_content'], "StoryURL": story['story_permalink'], "StoryAuthor": story['story_authors'], "PublishedAt": story['story_date'].strftime("%Y-%m-%dT%H:%M:%SZ"), "StoryScore": score, "Site": feed and feed['title'], "SiteURL": feed and feed['website'], "SiteRSS": feed and feed['address'], "meta": { "id": story['story_hash'], "timestamp": int(story['story_date'].strftime("%s")) }, }) if after: entries = sorted(entries, key=lambda s: s['meta']['timestamp']) logging.user(request, "~FYChecking unread%s stories with ~SB~FCIFTTT~SN~FY: ~SB%s~SN - ~SB%s~SN stories" % (" ~SBfocus~SN" if trigger_slug == "new-unread-focus-story" else "", feed_or_folder, len(entries))) return {"data": entries[:limit]}
def calculate_feed_scores(self, silent=False, stories=None): # now = datetime.datetime.strptime("2009-07-06 22:30:03", "%Y-%m-%d %H:%M:%S") now = datetime.datetime.now() UNREAD_CUTOFF = now - datetime.timedelta(days=settings.DAYS_OF_UNREAD) if self.user.profile.last_seen_on < UNREAD_CUTOFF: # if not silent: # logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 week+)' % (self.user, self.feed)) return if not self.feed.fetched_once: if not silent: logging.info(" ---> [%s] NOT Computing scores: %s" % (self.user, self.feed)) self.needs_unread_recalc = False self.save() return feed_scores = dict(negative=0, neutral=0, positive=0) # Two weeks in age. If mark_read_date is older, mark old stories as read. date_delta = UNREAD_CUTOFF if date_delta < self.mark_read_date: date_delta = self.mark_read_date else: self.mark_read_date = date_delta read_stories = MUserStory.objects( user_id=self.user_id, feed_id=self.feed_id, read_date__gte=self.mark_read_date ) read_stories_ids = [us.story_id for us in read_stories] if not stories: stories_db = MStory.objects(story_feed_id=self.feed_id, story_date__gte=date_delta) stories = Feed.format_stories(stories_db, self.feed_id) oldest_unread_story_date = now unread_stories = [] for story in stories: if story["story_date"] < date_delta: continue if story["id"] not in read_stories_ids: unread_stories.append(story) if story["story_date"] < oldest_unread_story_date: oldest_unread_story_date = story["story_date"] # if not silent: # logging.info(' ---> [%s] Format stories: %s' % (self.user, datetime.datetime.now() - now)) classifier_feeds = list(MClassifierFeed.objects(user_id=self.user_id, feed_id=self.feed_id, social_user_id=0)) classifier_authors = list(MClassifierAuthor.objects(user_id=self.user_id, feed_id=self.feed_id)) classifier_titles = list(MClassifierTitle.objects(user_id=self.user_id, feed_id=self.feed_id)) classifier_tags = list(MClassifierTag.objects(user_id=self.user_id, feed_id=self.feed_id)) # if not silent: # logging.info(' ---> [%s] Classifiers: %s (%s)' % (self.user, datetime.datetime.now() - now, classifier_feeds.count() + classifier_authors.count() + classifier_tags.count() + classifier_titles.count())) scores = {"feed": apply_classifier_feeds(classifier_feeds, self.feed)} for story in unread_stories: scores.update( { "author": apply_classifier_authors(classifier_authors, story), "tags": apply_classifier_tags(classifier_tags, story), "title": apply_classifier_titles(classifier_titles, story), } ) max_score = max(scores["author"], scores["tags"], scores["title"]) min_score = min(scores["author"], scores["tags"], scores["title"]) if max_score > 0: feed_scores["positive"] += 1 elif min_score < 0: feed_scores["negative"] += 1 else: if scores["feed"] > 0: feed_scores["positive"] += 1 elif scores["feed"] < 0: feed_scores["negative"] += 1 else: feed_scores["neutral"] += 1 # if not silent: # logging.info(' ---> [%s] End classifiers: %s' % (self.user, datetime.datetime.now() - now)) self.unread_count_positive = feed_scores["positive"] self.unread_count_neutral = feed_scores["neutral"] self.unread_count_negative = feed_scores["negative"] self.unread_count_updated = datetime.datetime.now() self.oldest_unread_story_date = oldest_unread_story_date self.needs_unread_recalc = False self.save() if self.unread_count_positive == 0 and self.unread_count_neutral == 0 and self.unread_count_negative == 0: self.mark_feed_read() if not silent: logging.info( " ---> [%s] Computing scores: %s (%s/%s/%s)" % (self.user, self.feed, feed_scores["negative"], feed_scores["neutral"], feed_scores["positive"]) ) return self
def calculate_feed_scores(self, silent=False, stories_db=None): UNREAD_CUTOFF = datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD) if self.user.profile.last_seen_on < UNREAD_CUTOFF: # if not silent: # logging.info(' ---> [%s] SKIPPING Computing scores: %s (1 week+)' % (self.user, self.feed)) return if not self.feed.fetched_once: if not silent: logging.info(' ---> [%s] NOT Computing scores: %s' % (self.user, self.feed)) self.needs_unread_recalc = False self.save() return if not silent: logging.info(' ---> [%s] Computing scores: %s' % (self.user, self.feed)) feed_scores = dict(negative=0, neutral=0, positive=0) # Two weeks in age. If mark_read_date is older, mark old stories as read. date_delta = UNREAD_CUTOFF if date_delta < self.mark_read_date: date_delta = self.mark_read_date else: self.mark_read_date = date_delta read_stories = MUserStory.objects(user_id=self.user.pk, feed_id=self.feed.pk, read_date__gte=self.mark_read_date) # if not silent: # logging.info(' ---> [%s] Read stories: %s' % (self.user, datetime.datetime.now() - now)) read_stories_ids = [] for us in read_stories: if hasattr(us.story, 'story_guid') and isinstance(us.story.story_guid, unicode): read_stories_ids.append(us.story.story_guid) elif hasattr(us.story, 'id') and isinstance(us.story.id, unicode): read_stories_ids.append(us.story.id) # TODO: Remove me after migration from story.id->guid stories_db = stories_db or MStory.objects(story_feed_id=self.feed.pk, story_date__gte=date_delta) # if not silent: # logging.info(' ---> [%s] MStory: %s' % (self.user, datetime.datetime.now() - now)) unread_stories_db = [] for story in stories_db: if story.story_date < date_delta: continue if hasattr(story, 'story_guid') and story.story_guid not in read_stories_ids: unread_stories_db.append(story) elif isinstance(story.id, unicode) and story.id not in read_stories_ids: unread_stories_db.append(story) stories = Feed.format_stories(unread_stories_db, self.feed.pk) # if not silent: # logging.info(' ---> [%s] Format stories: %s' % (self.user, datetime.datetime.now() - now)) classifier_feeds = MClassifierFeed.objects(user_id=self.user.pk, feed_id=self.feed.pk) classifier_authors = MClassifierAuthor.objects(user_id=self.user.pk, feed_id=self.feed.pk) classifier_titles = MClassifierTitle.objects(user_id=self.user.pk, feed_id=self.feed.pk) classifier_tags = MClassifierTag.objects(user_id=self.user.pk, feed_id=self.feed.pk) # if not silent: # logging.info(' ---> [%s] Classifiers: %s (%s)' % (self.user, datetime.datetime.now() - now, classifier_feeds.count() + classifier_authors.count() + classifier_tags.count() + classifier_titles.count())) scores = { 'feed': apply_classifier_feeds(classifier_feeds, self.feed), } for story in stories: classifier_authors.rewind() classifier_tags.rewind() classifier_titles.rewind() scores.update({ 'author': apply_classifier_authors(classifier_authors, story), 'tags': apply_classifier_tags(classifier_tags, story), 'title': apply_classifier_titles(classifier_titles, story), }) max_score = max(scores['author'], scores['tags'], scores['title']) min_score = min(scores['author'], scores['tags'], scores['title']) if max_score > 0: feed_scores['positive'] += 1 elif min_score < 0: feed_scores['negative'] += 1 else: if scores['feed'] > 0: feed_scores['positive'] += 1 elif scores['feed'] < 0: feed_scores['negative'] += 1 else: feed_scores['neutral'] += 1 # if not silent: # logging.info(' ---> [%s] End classifiers: %s' % (self.user, datetime.datetime.now() - now)) self.unread_count_positive = feed_scores['positive'] self.unread_count_neutral = feed_scores['neutral'] self.unread_count_negative = feed_scores['negative'] self.needs_unread_recalc = False self.save() if (self.unread_count_positive == 0 and self.unread_count_neutral == 0): self.mark_feed_read() cache.delete('usersub:%s' % self.user.id) return