def feeds_trainer(request): classifiers = [] feed_id = request.REQUEST.get('feed_id') user = get_user(request) usersubs = UserSubscription.objects.filter(user=user, active=True) if feed_id: feed = get_object_or_404(Feed, pk=feed_id) usersubs = usersubs.filter(feed=feed) usersubs = usersubs.select_related('feed').order_by( '-feed__stories_last_month') for us in usersubs: if (not us.is_trained and us.feed.stories_last_month > 0) or feed_id: classifier = dict() classifier['classifiers'] = get_classifiers_for_user( user, us.feed.pk) classifier['feed_id'] = us.feed.pk classifier['stories_last_month'] = us.feed.stories_last_month classifier['feed_tags'] = json.decode( us.feed.data.popular_tags ) if us.feed.data.popular_tags else [] classifier['feed_authors'] = json.decode( us.feed.data.popular_authors ) if us.feed.data.popular_authors else [] classifiers.append(classifier) logging.user(user, "~FGLoading Trainer: ~SB%s feeds" % (len(classifiers))) return classifiers
def exception_change_feed_address(request): feed_id = request.POST['feed_id'] feed = get_object_or_404(Feed, pk=feed_id) original_feed = feed feed_address = request.POST['feed_address'] code = -1 if feed.has_page_exception or feed.has_feed_exception: # Fix broken feed logging.user(request, "~FRFixing feed exception by address: ~SB%s~SN to ~SB%s" % (feed.feed_address, feed_address)) feed.has_feed_exception = False feed.active = True feed.fetched_once = False feed.feed_address = feed_address feed.next_scheduled_update = datetime.datetime.utcnow() duplicate_feed = feed.save() code = 1 if duplicate_feed: new_feed = Feed.objects.get(pk=duplicate_feed.pk) feed = new_feed new_feed.next_scheduled_update = datetime.datetime.utcnow() new_feed.has_feed_exception = False new_feed.active = True new_feed.save() merge_feeds(new_feed.pk, feed.pk) else: # Branch good feed logging.user(request, "~FRBranching feed by address: ~SB%s~SN to ~SB%s" % (feed.feed_address, feed_address)) feed, _ = Feed.objects.get_or_create(feed_address=feed_address, feed_link=feed.feed_link) if feed.pk != original_feed.pk: try: feed.branch_from_feed = original_feed.branch_from_feed or original_feed except Feed.DoesNotExist: feed.branch_from_feed = original_feed feed.feed_address_locked = True feed.save() code = 1 feed = feed.update() feed = Feed.objects.get(pk=feed.pk) usersub = UserSubscription.objects.get(user=request.user, feed=original_feed) if usersub: usersub.switch_feed(feed, original_feed) usersub = UserSubscription.objects.get(user=request.user, feed=feed) usersub.calculate_feed_scores(silent=False) feed.update_all_statistics() classifiers = get_classifiers_for_user(usersub.user, usersub.feed.pk) feeds = { original_feed.pk: usersub.canonical(full=True, classifiers=classifiers), } return { 'code': code, 'feeds': feeds, 'new_feed_id': usersub.feed.pk, }
def load_single_feed(request, feed_id): user = get_user(request) feed = get_object_or_404(Feed, pk=feed_id) classifiers = get_classifiers_for_user(user, feed_id=feed.pk) payload = feed.canonical(full=True) payload['classifiers'] = classifiers return payload
def load_single_feed(request, feed_id): user = get_user(request) feed = get_object_or_404(Feed, pk=feed_id) classifiers = get_classifiers_for_user(user, feed.pk) payload = feed.canonical(full=True) payload['classifiers'] = classifiers return payload
def get_classifiers_feed(request, feed_id): user = get_user(request) code = 0 payload = get_classifiers_for_user(user, feed_id=feed_id) response = dict(code=code, payload=payload) return response
def get_classifiers_feed(request): feed_id = int(request.POST['feed_id']) user = get_user(request) code = 0 payload = get_classifiers_for_user(user, feed_id) response = dict(code=code, payload=payload) return response
def social_feed_trainer(request): social_user_id = request.REQUEST.get("user_id") social_profile = MSocialProfile.get_user(social_user_id) social_user = get_object_or_404(User, pk=social_user_id) user = get_user(request) social_profile.count_stories() classifier = social_profile.to_json() classifier["classifiers"] = get_classifiers_for_user(user, social_user_id=classifier["id"]) classifier["num_subscribers"] = social_profile.follower_count classifier["feed_tags"] = [] classifier["feed_authors"] = [] logging.user(user, "~FGLoading social trainer on ~SB%s: %s" % (social_user.username, social_profile.title)) return [classifier]
def get_feeds_trainer(request): classifiers = [] usersubs = UserSubscription.objects.filter(user=request.user).select_related('feed')\ .order_by('-feed__stories_last_month') for us in usersubs: if not us.is_trained and us.feed.stories_last_month > 0: classifier = dict() classifier['classifiers'] = get_classifiers_for_user(request.user, us.feed.pk) classifier['feed_id'] = us.feed.pk classifier['stories_last_month'] = us.feed.stories_last_month classifier['feed_tags'] = json.decode(us.feed.popular_tags) if us.feed.popular_tags else [] classifier['feed_authors'] = json.decode(us.feed.popular_authors) if us.feed.popular_authors else [] classifiers.append(classifier) logging.info(" ---> [%s] Loading Trainer: %s feeds" % (request.user, len(classifiers))) return classifiers
def feeds_trainer(request): classifiers = [] feed_id = request.REQUEST.get('feed_id') user = get_user(request) usersubs = UserSubscription.objects.filter(user=user, active=True) if feed_id: feed = get_object_or_404(Feed, pk=feed_id) usersubs = usersubs.filter(feed=feed) usersubs = usersubs.select_related('feed').order_by('-feed__stories_last_month') for us in usersubs: if (not us.is_trained and us.feed.stories_last_month > 0) or feed_id: classifier = dict() classifier['classifiers'] = get_classifiers_for_user(user, us.feed.pk) classifier['feed_id'] = us.feed.pk classifier['stories_last_month'] = us.feed.stories_last_month classifier['feed_tags'] = json.decode(us.feed.data.popular_tags) if us.feed.data.popular_tags else [] classifier['feed_authors'] = json.decode(us.feed.data.popular_authors) if us.feed.data.popular_authors else [] classifiers.append(classifier) logging.user(user, "~FGLoading Trainer: ~SB%s feeds" % (len(classifiers))) return classifiers
def get_feeds_trainer(request): classifiers = [] feed_id = request.POST.get("feed_id") user = get_user(request) usersubs = UserSubscription.objects.filter(user=user, active=True) if feed_id: feed = get_object_or_404(Feed, pk=feed_id) usersubs = usersubs.filter(feed=feed) usersubs = usersubs.select_related("feed").order_by("-feed__stories_last_month") for us in usersubs: if (not us.is_trained and us.feed.stories_last_month > 0) or feed_id: classifier = dict() classifier["classifiers"] = get_classifiers_for_user(user, us.feed.pk) classifier["feed_id"] = us.feed.pk classifier["stories_last_month"] = us.feed.stories_last_month classifier["feed_tags"] = json.decode(us.feed.popular_tags) if us.feed.popular_tags else [] classifier["feed_authors"] = json.decode(us.feed.popular_authors) if us.feed.popular_authors else [] classifiers.append(classifier) logging.info(" ---> [%s] ~FGLoading Trainer: ~SB%s feeds" % (user, len(classifiers))) return classifiers
# Intelligence classifiers for all feeds involved def sort_by_feed(classifiers): feed_classifiers = defaultdict(list) for classifier in classifiers: feed_classifiers[classifier.feed_id].append(classifier) return feed_classifiers classifier_feeds = sort_by_feed(MClassifierFeed.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifier_authors = sort_by_feed(MClassifierAuthor.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifier_titles = sort_by_feed(MClassifierTitle.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifier_tags = sort_by_feed(MClassifierTag.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifiers = {} for feed_id in found_feed_ids: classifiers[feed_id] = get_classifiers_for_user(user, feed_id, classifier_feeds[feed_id], classifier_authors[feed_id], classifier_titles[feed_id], classifier_tags[feed_id]) # Just need to format stories for story in stories: story_date = localtime_for_timezone(story['story_date'], user.profile.timezone) now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone) story['short_parsed_date'] = format_story_link_date__short(story_date, now) story['long_parsed_date'] = format_story_link_date__long(story_date, now) story['read_status'] = 0 if story['id'] in starred_stories: story['starred'] = True starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone) story['starred_date'] = format_story_link_date__long(starred_date, now) story['intelligence'] = { 'feed': apply_classifier_feeds(classifier_feeds[story['story_feed_id']], story['story_feed_id']),
def exception_change_feed_address(request): feed_id = request.POST["feed_id"] feed = get_object_or_404(Feed, pk=feed_id) original_feed = feed feed_address = request.POST["feed_address"] timezone = request.user.profile.timezone code = -1 if not feed.known_good and (feed.has_page_exception or feed.has_feed_exception): # Fix broken feed logging.user( request, "~FRFixing feed exception by address: ~SB%s~SN to ~SB%s" % (feed.feed_address, feed_address) ) feed.has_feed_exception = False feed.active = True feed.fetched_once = False feed.feed_address = feed_address duplicate_feed = feed.schedule_feed_fetch_immediately() code = 1 if duplicate_feed: new_feed = Feed.objects.get(pk=duplicate_feed.pk) feed = new_feed new_feed.schedule_feed_fetch_immediately() new_feed.has_feed_exception = False new_feed.active = True new_feed = new_feed.save() if new_feed.pk != feed.pk: merge_feeds(new_feed.pk, feed.pk) else: # Branch good feed logging.user(request, "~FRBranching feed by address: ~SB%s~SN to ~SB%s" % (feed.feed_address, feed_address)) try: feed = Feed.objects.get( hash_address_and_link=Feed.generate_hash_address_and_link(feed_address, feed.feed_link) ) except Feed.DoesNotExist: feed = Feed.objects.create(feed_address=feed_address, feed_link=feed.feed_link) code = 1 if feed.pk != original_feed.pk: try: feed.branch_from_feed = original_feed.branch_from_feed or original_feed except Feed.DoesNotExist: feed.branch_from_feed = original_feed feed.feed_address_locked = True feed = feed.save() feed = feed.update() feed = Feed.get_by_id(feed.pk) try: usersub = UserSubscription.objects.get(user=request.user, feed=feed) except UserSubscription.DoesNotExist: usersubs = UserSubscription.objects.filter(user=request.user, feed=original_feed) if usersubs: usersub = usersubs[0] usersub.switch_feed(feed, original_feed) else: fetch_history = MFetchHistory.feed(feed_id, timezone=timezone) return { "code": -1, "feed_fetch_history": fetch_history["feed_fetch_history"], "page_fetch_history": fetch_history["page_fetch_history"], "push_history": fetch_history["push_history"], } usersub.calculate_feed_scores(silent=False) feed.update_all_statistics() classifiers = get_classifiers_for_user(usersub.user, feed_id=usersub.feed_id) feeds = {original_feed.pk: usersub and usersub.canonical(full=True, classifiers=classifiers)} if feed and feed.has_feed_exception: code = -1 fetch_history = MFetchHistory.feed(feed_id, timezone=timezone) return { "code": code, "feeds": feeds, "new_feed_id": usersub.feed_id, "feed_fetch_history": fetch_history["feed_fetch_history"], "page_fetch_history": fetch_history["page_fetch_history"], "push_history": fetch_history["push_history"], }
def exception_change_feed_link(request): feed_id = request.POST['feed_id'] feed = get_object_or_404(Feed, pk=feed_id) original_feed = feed feed_link = request.POST['feed_link'] timezone = request.user.profile.timezone code = -1 if feed.has_page_exception or feed.has_feed_exception: # Fix broken feed logging.user(request, "~FRFixing feed exception by link: ~SB%s~SN to ~SB%s" % (feed.feed_link, feed_link)) feed_address = feedfinder.feed(feed_link) if feed_address: code = 1 feed.has_page_exception = False feed.active = True feed.fetched_once = False feed.feed_link = feed_link feed.feed_address = feed_address duplicate_feed = feed.schedule_feed_fetch_immediately() if duplicate_feed: new_feed = Feed.objects.get(pk=duplicate_feed.pk) feed = new_feed new_feed.schedule_feed_fetch_immediately() new_feed.has_page_exception = False new_feed.active = True new_feed.save() else: # Branch good feed logging.user(request, "~FRBranching feed by link: ~SB%s~SN to ~SB%s" % (feed.feed_link, feed_link)) feed, _ = Feed.objects.get_or_create(feed_address=feed.feed_address, feed_link=feed_link) code = 1 if feed.pk != original_feed.pk: try: feed.branch_from_feed = original_feed.branch_from_feed or original_feed except Feed.DoesNotExist: feed.branch_from_feed = original_feed feed.feed_link_locked = True feed.save() feed = feed.update() feed = Feed.get_by_id(feed.pk) try: usersub = UserSubscription.objects.get(user=request.user, feed=feed) except UserSubscription.DoesNotExist: usersubs = UserSubscription.objects.filter(user=request.user, feed=original_feed) if usersubs: usersub = usersubs[0] usersub.switch_feed(feed, original_feed) else: fetch_history = MFetchHistory.feed(feed_id, timezone=timezone) return { 'code': -1, 'feed_fetch_history': fetch_history['feed_fetch_history'], 'page_fetch_history': fetch_history['page_fetch_history'], 'push_history': fetch_history['push_history'], } usersub.calculate_feed_scores(silent=False) feed.update_all_statistics() classifiers = get_classifiers_for_user(usersub.user, feed_id=usersub.feed_id) if feed and feed.has_feed_exception: code = -1 feeds = { original_feed.pk: usersub.canonical(full=True, classifiers=classifiers), } fetch_history = MFetchHistory.feed(feed_id, timezone=timezone) return { 'code': code, 'feeds': feeds, 'new_feed_id': usersub.feed_id, 'feed_fetch_history': fetch_history['feed_fetch_history'], 'page_fetch_history': fetch_history['page_fetch_history'], 'push_history': fetch_history['push_history'], }
def exception_change_feed_link(request): feed_id = request.POST["feed_id"] feed = get_object_or_404(Feed, pk=feed_id) original_feed = feed feed_link = request.POST["feed_link"] code = -1 if feed.has_page_exception or feed.has_feed_exception: # Fix broken feed logging.user(request, "~FRFixing feed exception by link: ~SB%s~SN to ~SB%s" % (feed.feed_link, feed_link)) feed_address = feedfinder.feed(feed_link) if feed_address: code = 1 feed.has_page_exception = False feed.active = True feed.fetched_once = False feed.feed_link = feed_link feed.feed_address = feed_address feed.next_scheduled_update = datetime.datetime.utcnow() duplicate_feed = feed.save() if duplicate_feed: new_feed = Feed.objects.get(pk=duplicate_feed.pk) feed = new_feed new_feed.next_scheduled_update = datetime.datetime.utcnow() new_feed.has_page_exception = False new_feed.active = True new_feed.save() else: # Branch good feed logging.user(request, "~FRBranching feed by link: ~SB%s~SN to ~SB%s" % (feed.feed_link, feed_link)) feed, _ = Feed.objects.get_or_create(feed_address=feed.feed_address, feed_link=feed_link) if feed.pk != original_feed.pk: try: feed.branch_from_feed = original_feed.branch_from_feed or original_feed except Feed.DoesNotExist: feed.branch_from_feed = original_feed feed.feed_link_locked = True feed.save() code = 1 feed = feed.update() feed = Feed.get_by_id(feed.pk) try: usersub = UserSubscription.objects.get(user=request.user, feed=feed) except UserSubscription.DoesNotExist: usersubs = UserSubscription.objects.filter(user=request.user, feed=original_feed) if usersubs: usersub = usersubs[0] usersub.switch_feed(feed, original_feed) else: return {"code": -1} usersub.calculate_feed_scores(silent=False) feed.update_all_statistics() classifiers = get_classifiers_for_user(usersub.user, feed_id=usersub.feed_id) if feed and feed.has_feed_exception: code = -1 feeds = {original_feed.pk: usersub.canonical(full=True, classifiers=classifiers)} return {"code": code, "feeds": feeds, "new_feed_id": usersub.feed_id}
def exception_change_feed_link(request): feed_id = request.POST['feed_id'] feed = get_object_or_404(Feed, pk=feed_id) original_feed = feed feed_link = request.POST['feed_link'] code = -1 if feed.has_page_exception or feed.has_feed_exception: # Fix broken feed logging.user( request, "~FRFixing feed exception by link: ~SB%s~SN to ~SB%s" % (feed.feed_link, feed_link)) feed_address = feedfinder.feed(feed_link) if feed_address: code = 1 feed.has_page_exception = False feed.active = True feed.fetched_once = False feed.feed_link = feed_link feed.feed_address = feed_address feed.next_scheduled_update = datetime.datetime.utcnow() duplicate_feed = feed.save() if duplicate_feed: new_feed = Feed.objects.get(pk=duplicate_feed.pk) feed = new_feed new_feed.next_scheduled_update = datetime.datetime.utcnow() new_feed.has_page_exception = False new_feed.active = True new_feed.save() else: # Branch good feed logging.user( request, "~FRBranching feed by link: ~SB%s~SN to ~SB%s" % (feed.feed_link, feed_link)) feed, _ = Feed.objects.get_or_create(feed_address=feed.feed_address, feed_link=feed_link) if feed.pk != original_feed.pk: try: feed.branch_from_feed = original_feed.branch_from_feed or original_feed except Feed.DoesNotExist: feed.branch_from_feed = original_feed feed.feed_link_locked = True feed.save() code = 1 feed = feed.update() feed = Feed.objects.get(pk=feed.pk) usersub = UserSubscription.objects.get(user=request.user, feed=original_feed) if usersub: usersub.switch_feed(feed, original_feed) usersub = UserSubscription.objects.get(user=request.user, feed=feed) usersub.calculate_feed_scores(silent=False) feed.update_all_statistics() classifiers = get_classifiers_for_user(usersub.user, usersub.feed.pk) feeds = { original_feed.pk: usersub.canonical(full=True, classifiers=classifiers), } return { 'code': code, 'feeds': feeds, 'new_feed_id': usersub.feed.pk, }
def load_single_feed(request): user = get_user(request) offset = int(request.REQUEST.get("offset", 0)) limit = int(request.REQUEST.get("limit", 30)) page = int(request.REQUEST.get("page", 0)) if page: offset = limit * page feed_id = int(request.REQUEST.get("feed_id", 0)) if feed_id == 0: raise Http404 try: feed = Feed.objects.get(id=feed_id) except Feed.DoesNotExist: feed_address = request.REQUEST.get("feed_address") dupe_feed = DuplicateFeed.objects.filter(duplicate_address=feed_address) if dupe_feed: feed = dupe_feed[0].feed else: raise Http404 force_update = request.GET.get("force_update", False) now = datetime.datetime.utcnow() stories = feed.get_stories(offset, limit) if force_update: feed.update(force_update) # Get intelligence classifier for user classifier_feeds = MClassifierFeed.objects(user_id=user.pk, feed_id=feed_id) classifier_authors = MClassifierAuthor.objects(user_id=user.pk, feed_id=feed_id) classifier_titles = MClassifierTitle.objects(user_id=user.pk, feed_id=feed_id) classifier_tags = MClassifierTag.objects(user_id=user.pk, feed_id=feed_id) usersub = UserSubscription.objects.get(user=user, feed=feed) userstories = [] userstories_db = MUserStory.objects(user_id=user.pk, feed_id=feed.pk, read_date__gte=usersub.mark_read_date) starred_stories = MStarredStory.objects(user_id=user.pk, story_feed_id=feed_id).only("story_guid", "starred_date") starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories]) for us in userstories_db: if hasattr(us.story, "story_guid") and isinstance(us.story.story_guid, unicode): userstories.append(us.story.story_guid) elif hasattr(us.story, "id") and isinstance(us.story.id, unicode): userstories.append(us.story.id) # TODO: Remove me after migration from story.id->guid for story in stories: classifier_feeds.rewind() classifier_authors.rewind() classifier_tags.rewind() classifier_titles.rewind() story_date = localtime_for_timezone(story["story_date"], user.profile.timezone) story["short_parsed_date"] = format_story_link_date__short(story_date) story["long_parsed_date"] = format_story_link_date__long(story_date) if story["id"] in userstories: story["read_status"] = 1 elif not story.get("read_status") and story["story_date"] < usersub.mark_read_date: story["read_status"] = 1 elif not story.get("read_status") and story["story_date"] > usersub.last_read_date: story["read_status"] = 0 if story["id"] in starred_stories: story["starred"] = True starred_date = localtime_for_timezone(starred_stories[story["id"]], user.profile.timezone) story["starred_date"] = format_story_link_date__long(starred_date) story["intelligence"] = { "feed": apply_classifier_feeds(classifier_feeds, feed), "author": apply_classifier_authors(classifier_authors, story), "tags": apply_classifier_tags(classifier_tags, story), "title": apply_classifier_titles(classifier_titles, story), } # Intelligence feed_tags = json.decode(feed.popular_tags) if feed.popular_tags else [] feed_authors = json.decode(feed.popular_authors) if feed.popular_authors else [] classifiers = get_classifiers_for_user( user, feed_id, classifier_feeds, classifier_authors, classifier_titles, classifier_tags ) usersub.feed_opens += 1 usersub.save() diff = datetime.datetime.utcnow() - now timediff = float("%s.%s" % (diff.seconds, (diff.microseconds / 1000))) last_update = relative_timesince(feed.last_update) logging.info(" ---> [%s] ~FYLoading feed: ~SB%s ~SN(%s seconds)" % (request.user, feed, timediff)) FeedLoadtime.objects.create(feed=feed, loadtime=timediff) data = dict( stories=stories, feed_tags=feed_tags, feed_authors=feed_authors, classifiers=classifiers, last_update=last_update, feed_id=feed.pk, ) return data
def load_single_feed(request): user = get_user(request) offset = int(request.REQUEST.get('offset', 0)) limit = int(request.REQUEST.get('limit', 30)) page = int(request.REQUEST.get('page', 0)) if page: offset = limit * page feed_id = int(request.REQUEST.get('feed_id', 0)) if feed_id == 0: raise Http404 try: feed = Feed.objects.get(id=feed_id) except Feed.DoesNotExist: feed_address = request.REQUEST.get('feed_address') dupe_feed = DuplicateFeed.objects.filter(duplicate_address=feed_address) if dupe_feed: feed = dupe_feed[0].feed else: raise Http404 force_update = request.GET.get('force_update', False) now = datetime.datetime.utcnow() stories = feed.get_stories(offset, limit) if force_update: feed.update(force_update) # Get intelligence classifier for user classifier_feeds = MClassifierFeed.objects(user_id=user.pk, feed_id=feed_id) classifier_authors = MClassifierAuthor.objects(user_id=user.pk, feed_id=feed_id) classifier_titles = MClassifierTitle.objects(user_id=user.pk, feed_id=feed_id) classifier_tags = MClassifierTag.objects(user_id=user.pk, feed_id=feed_id) try: usersub = UserSubscription.objects.get(user=user, feed=feed) except UserSubscription.DoesNotExist: # FIXME: Why is this happening for `conesus` when logged into another account?! logging.info(" ***> [%s] UserSub DNE, creating: %s" % (user, feed)) usersub = UserSubscription.objects.create(user=user, feed=feed) userstories = [] userstories_db = MUserStory.objects(user_id=user.pk, feed_id=feed.pk, read_date__gte=usersub.mark_read_date) for us in userstories_db: if hasattr(us.story, 'story_guid') and isinstance(us.story.story_guid, unicode): userstories.append(us.story.story_guid) elif hasattr(us.story, 'id') and isinstance(us.story.id, unicode): userstories.append(us.story.id) # TODO: Remove me after migration from story.id->guid for story in stories: classifier_feeds.rewind() classifier_authors.rewind() classifier_tags.rewind() classifier_titles.rewind() if story['id'] in userstories: story['read_status'] = 1 elif not story.get('read_status') and story['story_date'] < usersub.mark_read_date: story['read_status'] = 1 elif not story.get('read_status') and story['story_date'] > usersub.last_read_date: story['read_status'] = 0 story['intelligence'] = { 'feed': apply_classifier_feeds(classifier_feeds, feed), 'author': apply_classifier_authors(classifier_authors, story), 'tags': apply_classifier_tags(classifier_tags, story), 'title': apply_classifier_titles(classifier_titles, story), } # Intelligence feed_tags = json.decode(feed.popular_tags) if feed.popular_tags else [] feed_authors = json.decode(feed.popular_authors) if feed.popular_authors else [] classifiers = get_classifiers_for_user(user, feed_id, classifier_feeds, classifier_authors, classifier_titles, classifier_tags) usersub.feed_opens += 1 usersub.save() diff = datetime.datetime.utcnow()-now timediff = float("%s.%s" % (diff.seconds, (diff.microseconds / 1000))) logging.info(" ---> [%s] Loading feed: %s (%s seconds)" % (request.user, feed, timediff)) FeedLoadtime.objects.create(feed=feed, loadtime=timediff) last_update = relative_timesince(feed.last_update) data = dict(stories=stories, feed_tags=feed_tags, feed_authors=feed_authors, classifiers=classifiers, last_update=last_update, feed_id=feed.pk) return data
def load_single_feed(request, feed_id): start = time.time() user = get_user(request) offset = int(request.REQUEST.get('offset', 0)) limit = int(request.REQUEST.get('limit', 12)) page = int(request.REQUEST.get('page', 1)) dupe_feed_id = None userstories_db = None if page: offset = limit * (page-1) if not feed_id: raise Http404 try: feed = Feed.objects.get(id=feed_id) except Feed.DoesNotExist: feed_address = request.REQUEST.get('feed_address') dupe_feed = DuplicateFeed.objects.filter(duplicate_address=feed_address) if dupe_feed: feed = dupe_feed[0].feed dupe_feed_id = feed_id else: raise Http404 stories = feed.get_stories(offset, limit) # Get intelligence classifier for user classifier_feeds = list(MClassifierFeed.objects(user_id=user.pk, feed_id=feed_id)) classifier_authors = list(MClassifierAuthor.objects(user_id=user.pk, feed_id=feed_id)) classifier_titles = list(MClassifierTitle.objects(user_id=user.pk, feed_id=feed_id)) classifier_tags = list(MClassifierTag.objects(user_id=user.pk, feed_id=feed_id)) checkpoint1 = time.time() usersub = UserSubscription.objects.get(user=user, feed=feed) userstories = [] if usersub and stories: story_ids = [story['id'] for story in stories] userstories_db = MUserStory.objects(user_id=user.pk, feed_id=feed.pk, story_id__in=story_ids).only('story_id') starred_stories = MStarredStory.objects(user_id=user.pk, story_feed_id=feed_id, story_guid__in=story_ids).only('story_guid', 'starred_date') starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories]) userstories = set(us.story_id for us in userstories_db) checkpoint2 = time.time() for story in stories: story_date = localtime_for_timezone(story['story_date'], user.profile.timezone) now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone) story['short_parsed_date'] = format_story_link_date__short(story_date, now) story['long_parsed_date'] = format_story_link_date__long(story_date, now) if usersub: if story['id'] in userstories: story['read_status'] = 1 elif not story.get('read_status') and story['story_date'] < usersub.mark_read_date: story['read_status'] = 1 elif not story.get('read_status') and story['story_date'] > usersub.last_read_date: story['read_status'] = 0 if story['id'] in starred_stories: story['starred'] = True starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone) story['starred_date'] = format_story_link_date__long(starred_date, now) else: story['read_status'] = 1 story['intelligence'] = { 'feed': apply_classifier_feeds(classifier_feeds, feed), 'author': apply_classifier_authors(classifier_authors, story), 'tags': apply_classifier_tags(classifier_tags, story), 'title': apply_classifier_titles(classifier_titles, story), } checkpoint3 = time.time() # Intelligence feed_tags = json.decode(feed.data.popular_tags) if feed.data.popular_tags else [] feed_authors = json.decode(feed.data.popular_authors) if feed.data.popular_authors else [] classifiers = get_classifiers_for_user(user, feed_id, classifier_feeds, classifier_authors, classifier_titles, classifier_tags) if usersub: usersub.feed_opens += 1 usersub.save() diff1 = checkpoint1-start diff2 = checkpoint2-start diff3 = checkpoint3-start timediff = time.time()-start last_update = relative_timesince(feed.last_update) logging.user(request, "~FYLoading feed: ~SB%s%s ~SN(%.4s seconds, ~SB%.4s/%.4s(%s)/%.4s~SN)" % ( feed.feed_title[:32], ('~SN/p%s' % page) if page > 1 else '', timediff, diff1, diff2, userstories_db and userstories_db.count() or '~SN0~SB', diff3)) FeedLoadtime.objects.create(feed=feed, loadtime=timediff) data = dict(stories=stories, feed_tags=feed_tags, feed_authors=feed_authors, classifiers=classifiers, last_update=last_update, feed_id=feed.pk) if dupe_feed_id: data['dupe_feed_id'] = dupe_feed_id if not usersub: data.update(feed.canonical()) return data
def load_river_stories(request): limit = 18 offset = 0 start = datetime.datetime.utcnow() user = get_user(request) feed_ids = [int(feed_id) for feed_id in request.REQUEST.getlist('feeds') if feed_id] original_feed_ids = list(feed_ids) page = int(request.REQUEST.get('page', 1)) read_stories_count = int(request.REQUEST.get('read_stories_count', 0)) new_flag = request.REQUEST.get('new_flag', False) bottom_delta = datetime.timedelta(days=settings.DAYS_OF_UNREAD) if not feed_ids: logging.user(request, "~FCLoading empty river stories: page %s" % (page)) return dict(stories=[]) # Fetch all stories at and before the page number. # Not a single page, because reading stories can move them up in the unread order. # `read_stories_count` is an optimization, works best when all 25 stories before have been read. limit = limit * page - read_stories_count # Read stories to exclude read_stories = MUserStory.objects(user_id=user.pk, feed_id__in=feed_ids).only('story_id') read_stories = [rs.story_id for rs in read_stories] # Determine mark_as_read dates for all feeds to ignore all stories before this date. # max_feed_count = 0 feed_counts = {} feed_last_reads = {} for feed_id in feed_ids: try: usersub = UserSubscription.objects.get(feed__pk=feed_id, user=user) except UserSubscription.DoesNotExist: continue if not usersub: continue feed_counts[feed_id] = (usersub.unread_count_negative * 1 + usersub.unread_count_neutral * 10 + usersub.unread_count_positive * 20) # if feed_counts[feed_id] > max_feed_count: # max_feed_count = feed_counts[feed_id] feed_last_reads[feed_id] = int(time.mktime(usersub.mark_read_date.timetuple())) feed_counts = sorted(feed_counts.items(), key=itemgetter(1))[:50] feed_ids = [f[0] for f in feed_counts] feed_last_reads = dict([(str(feed_id), feed_last_reads[feed_id]) for feed_id in feed_ids if feed_id in feed_last_reads]) feed_counts = dict(feed_counts) # After excluding read stories, all that's left are stories # past the mark_read_date. Everything returned is guaranteed to be unread. mstories = MStory.objects( story_guid__nin=read_stories, story_feed_id__in=feed_ids, # story_date__gte=start - bottom_delta ).map_reduce("""function() { var d = feed_last_reads[this[~story_feed_id]]; if (this[~story_date].getTime()/1000 > d) { emit(this[~id], this); } }""", """function(key, values) { return values[0]; }""", output='inline', scope={ 'feed_last_reads': feed_last_reads } ) mstories = [story.value for story in mstories if story and story.value] mstories = sorted(mstories, cmp=lambda x, y: cmp(story_score(y, bottom_delta), story_score(x, bottom_delta))) # story_feed_counts = defaultdict(int) # mstories_pruned = [] # for story in mstories: # print story['story_title'], story_feed_counts[story['story_feed_id']] # if story_feed_counts[story['story_feed_id']] >= 3: continue # mstories_pruned.append(story) # story_feed_counts[story['story_feed_id']] += 1 stories = [] for i, story in enumerate(mstories): if i < offset: continue if i >= offset + limit: break stories.append(bunch(story)) stories = Feed.format_stories(stories) found_feed_ids = list(set([story['story_feed_id'] for story in stories])) # Find starred stories starred_stories = MStarredStory.objects( user_id=user.pk, story_feed_id__in=found_feed_ids ).only('story_guid', 'starred_date') starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories]) # Intelligence classifiers for all feeds involved def sort_by_feed(classifiers): feed_classifiers = defaultdict(list) for classifier in classifiers: feed_classifiers[classifier.feed_id].append(classifier) return feed_classifiers classifier_feeds = sort_by_feed(MClassifierFeed.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifier_authors = sort_by_feed(MClassifierAuthor.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifier_titles = sort_by_feed(MClassifierTitle.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifier_tags = sort_by_feed(MClassifierTag.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifiers = {} for feed_id in found_feed_ids: classifiers[feed_id] = get_classifiers_for_user(user, feed_id, classifier_feeds[feed_id], classifier_authors[feed_id], classifier_titles[feed_id], classifier_tags[feed_id]) # Just need to format stories for story in stories: story_date = localtime_for_timezone(story['story_date'], user.profile.timezone) now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone) story['short_parsed_date'] = format_story_link_date__short(story_date, now) story['long_parsed_date'] = format_story_link_date__long(story_date, now) story['read_status'] = 0 if story['id'] in starred_stories: story['starred'] = True starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone) story['starred_date'] = format_story_link_date__long(starred_date, now) story['intelligence'] = { 'feed': apply_classifier_feeds(classifier_feeds[story['story_feed_id']], story['story_feed_id']), 'author': apply_classifier_authors(classifier_authors[story['story_feed_id']], story), 'tags': apply_classifier_tags(classifier_tags[story['story_feed_id']], story), 'title': apply_classifier_titles(classifier_titles[story['story_feed_id']], story), } diff = datetime.datetime.utcnow() - start timediff = float("%s.%.2s" % (diff.seconds, (diff.microseconds / 1000))) logging.user(request, "~FCLoading river stories: page %s - ~SB%s/%s " "stories ~SN(%s/%s/%s feeds) ~FB(%s seconds)" % (page, len(stories), len(mstories), len(found_feed_ids), len(feed_ids), len(original_feed_ids), timediff)) if new_flag: return dict(stories=stories, classifiers=classifiers) else: logging.user(request, "~BR~FCNo new flag on river") return dict(stories=stories)
def load_single_feed(request, feed_id): start = datetime.datetime.utcnow() user = get_user(request) offset = int(request.REQUEST.get('offset', 0)) limit = int(request.REQUEST.get('limit', 12)) page = int(request.REQUEST.get('page', 1)) if page: offset = limit * (page-1) dupe_feed_id = None if not feed_id: raise Http404 try: feed = Feed.objects.get(id=feed_id) except Feed.DoesNotExist: feed_address = request.REQUEST.get('feed_address') dupe_feed = DuplicateFeed.objects.filter(duplicate_address=feed_address) if dupe_feed: feed = dupe_feed[0].feed dupe_feed_id = feed_id else: raise Http404 stories = feed.get_stories(offset, limit) # Get intelligence classifier for user classifier_feeds = MClassifierFeed.objects(user_id=user.pk, feed_id=feed_id) classifier_authors = MClassifierAuthor.objects(user_id=user.pk, feed_id=feed_id) classifier_titles = MClassifierTitle.objects(user_id=user.pk, feed_id=feed_id) classifier_tags = MClassifierTag.objects(user_id=user.pk, feed_id=feed_id) usersub = UserSubscription.objects.get(user=user, feed=feed) userstories = [] if usersub: userstories_db = MUserStory.objects(user_id=user.pk, feed_id=feed.pk, read_date__gte=usersub.mark_read_date) starred_stories = MStarredStory.objects(user_id=user.pk, story_feed_id=feed_id).only('story_guid', 'starred_date') starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories]) for us in userstories_db: if hasattr(us.story, 'story_guid') and isinstance(us.story.story_guid, unicode): userstories.append(us.story.story_guid) elif hasattr(us.story, 'id') and isinstance(us.story.id, unicode): userstories.append(us.story.id) # TODO: Remove me after migration from story.id->guid for story in stories: [x.rewind() for x in [classifier_feeds, classifier_authors, classifier_tags, classifier_titles]] story_date = localtime_for_timezone(story['story_date'], user.profile.timezone) now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone) story['short_parsed_date'] = format_story_link_date__short(story_date, now) story['long_parsed_date'] = format_story_link_date__long(story_date, now) if usersub: if story['id'] in userstories: story['read_status'] = 1 elif not story.get('read_status') and story['story_date'] < usersub.mark_read_date: story['read_status'] = 1 elif not story.get('read_status') and story['story_date'] > usersub.last_read_date: story['read_status'] = 0 if story['id'] in starred_stories: story['starred'] = True starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone) story['starred_date'] = format_story_link_date__long(starred_date, now) else: story['read_status'] = 1 story['intelligence'] = { 'feed': apply_classifier_feeds(classifier_feeds, feed), 'author': apply_classifier_authors(classifier_authors, story), 'tags': apply_classifier_tags(classifier_tags, story), 'title': apply_classifier_titles(classifier_titles, story), } # Intelligence feed_tags = json.decode(feed.data.popular_tags) if feed.data.popular_tags else [] feed_authors = json.decode(feed.data.popular_authors) if feed.data.popular_authors else [] classifiers = get_classifiers_for_user(user, feed_id, classifier_feeds, classifier_authors, classifier_titles, classifier_tags) if usersub: usersub.feed_opens += 1 usersub.save() diff = datetime.datetime.utcnow()-start timediff = float("%s.%.2s" % (diff.seconds, (diff.microseconds / 1000))) last_update = relative_timesince(feed.last_update) logging.user(request.user, "~FYLoading feed: ~SB%s%s ~SN(%s seconds)" % ( feed, ('~SN/p%s' % page) if page > 1 else '', timediff)) FeedLoadtime.objects.create(feed=feed, loadtime=timediff) data = dict(stories=stories, feed_tags=feed_tags, feed_authors=feed_authors, classifiers=classifiers, last_update=last_update, feed_id=feed.pk) if dupe_feed_id: data['dupe_feed_id'] = dupe_feed_id if not usersub: data.update(feed.canonical()) return data
def load_single_feed(request): user = get_user(request) offset = int(request.REQUEST.get('offset', 0)) limit = int(request.REQUEST.get('limit', 30)) page = int(request.REQUEST.get('page', 0)) if page: offset = limit * page feed_id = int(request.REQUEST['feed_id']) feed = Feed.objects.get(id=feed_id) force_update = request.GET.get('force_update', False) now = datetime.datetime.now() stories = feed.get_stories(offset, limit) if force_update: feed.update(force_update) # Get intelligence classifier for user classifier_feeds = MClassifierFeed.objects(user_id=user.pk, feed_id=feed_id) classifier_authors = MClassifierAuthor.objects(user_id=user.pk, feed_id=feed_id) classifier_titles = MClassifierTitle.objects(user_id=user.pk, feed_id=feed_id) classifier_tags = MClassifierTag.objects(user_id=user.pk, feed_id=feed_id) try: usersub = UserSubscription.objects.get(user=user, feed=feed) except UserSubscription.DoesNotExist: # FIXME: Why is this happening for `conesus` when logged into another account?! logging.info(" ***> [%s] UserSub DNE, creating: %s" % (user, feed)) usersub = UserSubscription.objects.create(user=user, feed=feed) userstories = MUserStory.objects(user_id=user.pk, feed_id=feed.pk, read_date__gte=usersub.mark_read_date) userstories = [us.story.id for us in userstories] for story in stories: classifier_feeds.rewind() classifier_authors.rewind() classifier_tags.rewind() classifier_titles.rewind() if story.get('id') in userstories: story['read_status'] = 1 elif not story.get('read_status') and story['story_date'] < usersub.mark_read_date: story['read_status'] = 1 elif not story.get('read_status') and story['story_date'] > usersub.last_read_date: story['read_status'] = 0 story['intelligence'] = { 'feed': apply_classifier_feeds(classifier_feeds, feed), 'author': apply_classifier_authors(classifier_authors, story), 'tags': apply_classifier_tags(classifier_tags, story), 'title': apply_classifier_titles(classifier_titles, story), } # Intelligence feed_tags = json.decode(feed.popular_tags) if feed.popular_tags else [] feed_authors = json.decode(feed.popular_authors) if feed.popular_authors else [] classifiers = get_classifiers_for_user(user, feed_id, classifier_feeds, classifier_authors, classifier_titles, classifier_tags) usersub.feed_opens += 1 usersub.save() diff = datetime.datetime.now()-now logging.info(" ---> [%s] Loading feed: %s (%s.%s seconds)" % (request.user, feed, diff.seconds, diff.microseconds / 1000)) data = dict(stories=stories, feed_tags=feed_tags, feed_authors=feed_authors, classifiers=classifiers) return data
def load_single_feed(request, feed_id): start = time.time() user = get_user(request) offset = int(request.REQUEST.get('offset', 0)) limit = int(request.REQUEST.get('limit', 12)) page = int(request.REQUEST.get('page', 1)) dupe_feed_id = None userstories_db = None if page: offset = limit * (page-1) if not feed_id: raise Http404 try: feed = Feed.objects.get(id=feed_id) except Feed.DoesNotExist: feed_address = request.REQUEST.get('feed_address') dupe_feed = DuplicateFeed.objects.filter(duplicate_address=feed_address) if dupe_feed: feed = dupe_feed[0].feed dupe_feed_id = feed_id else: raise Http404 stories = feed.get_stories(offset, limit) # Get intelligence classifier for user classifier_feeds = list(MClassifierFeed.objects(user_id=user.pk, feed_id=feed_id)) classifier_authors = list(MClassifierAuthor.objects(user_id=user.pk, feed_id=feed_id)) classifier_titles = list(MClassifierTitle.objects(user_id=user.pk, feed_id=feed_id)) classifier_tags = list(MClassifierTag.objects(user_id=user.pk, feed_id=feed_id)) checkpoint1 = time.time() usersub = UserSubscription.objects.get(user=user, feed=feed) userstories = [] if usersub: userstories_db = MUserStory.objects(user_id=user.pk, feed_id=feed.pk, read_date__gte=usersub.mark_read_date) starred_stories = MStarredStory.objects(user_id=user.pk, story_feed_id=feed_id).only('story_guid', 'starred_date') starred_stories = dict([(story.story_guid, story.starred_date) for story in starred_stories]) for us in userstories_db: if hasattr(us.story, 'story_guid') and isinstance(us.story.story_guid, unicode): userstories.append(us.story.story_guid) elif hasattr(us.story, 'id') and isinstance(us.story.id, unicode): userstories.append(us.story.id) # TODO: Remove me after migration from story.id->guid checkpoint2 = time.time() for story in stories: story_date = localtime_for_timezone(story['story_date'], user.profile.timezone) now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone) story['short_parsed_date'] = format_story_link_date__short(story_date, now) story['long_parsed_date'] = format_story_link_date__long(story_date, now) if usersub: if story['id'] in userstories: story['read_status'] = 1 elif not story.get('read_status') and story['story_date'] < usersub.mark_read_date: story['read_status'] = 1 elif not story.get('read_status') and story['story_date'] > usersub.last_read_date: story['read_status'] = 0 if story['id'] in starred_stories: story['starred'] = True starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone) story['starred_date'] = format_story_link_date__long(starred_date, now) else: story['read_status'] = 1 story['intelligence'] = { 'feed': apply_classifier_feeds(classifier_feeds, feed), 'author': apply_classifier_authors(classifier_authors, story), 'tags': apply_classifier_tags(classifier_tags, story), 'title': apply_classifier_titles(classifier_titles, story), } checkpoint3 = time.time() # Intelligence feed_tags = json.decode(feed.data.popular_tags) if feed.data.popular_tags else [] feed_authors = json.decode(feed.data.popular_authors) if feed.data.popular_authors else [] classifiers = get_classifiers_for_user(user, feed_id, classifier_feeds, classifier_authors, classifier_titles, classifier_tags) if usersub: usersub.feed_opens += 1 usersub.save() timediff = time.time()-start last_update = relative_timesince(feed.last_update) logging.user(request.user, "~FYLoading feed: ~SB%s%s ~SN(%.4s seconds)" % ( feed, ('~SN/p%s' % page) if page > 1 else '', timediff)) FeedLoadtime.objects.create(feed=feed, loadtime=timediff) if timediff >= 1: diff1 = checkpoint1-start diff2 = checkpoint2-start diff3 = checkpoint3-start logging.user(request.user, "~FYSlow feed load: ~SB%.4s/%.4s(%s)/%.4s" % ( diff1, diff2, userstories_db and userstories_db.count(), diff3)) data = dict(stories=stories, feed_tags=feed_tags, feed_authors=feed_authors, classifiers=classifiers, last_update=last_update, feed_id=feed.pk) if dupe_feed_id: data['dupe_feed_id'] = dupe_feed_id if not usersub: data.update(feed.canonical()) return data
feed_classifiers = defaultdict(list) for classifier in classifiers: feed_classifiers[classifier.feed_id].append(classifier) return feed_classifiers classifiers = {} try: classifier_feeds = sort_by_feed(MClassifierFeed.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifier_authors = sort_by_feed(MClassifierAuthor.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifier_titles = sort_by_feed(MClassifierTitle.objects(user_id=user.pk, feed_id__in=found_feed_ids)) classifier_tags = sort_by_feed(MClassifierTag.objects(user_id=user.pk, feed_id__in=found_feed_ids)) except OperationFailure: logging.info(" ***> Classifiers failure") else: for feed_id in found_feed_ids: classifiers[feed_id] = get_classifiers_for_user(user, feed_id, classifier_feeds[feed_id], classifier_authors[feed_id], classifier_titles[feed_id], classifier_tags[feed_id]) # Just need to format stories for story in stories: story_date = localtime_for_timezone(story['story_date'], user.profile.timezone) now = localtime_for_timezone(datetime.datetime.now(), user.profile.timezone) story['short_parsed_date'] = format_story_link_date__short(story_date, now) story['long_parsed_date'] = format_story_link_date__long(story_date, now) story['read_status'] = 0 if story['id'] in starred_stories: story['starred'] = True starred_date = localtime_for_timezone(starred_stories[story['id']], user.profile.timezone) story['starred_date'] = format_story_link_date__long(starred_date, now) story['intelligence'] = { 'feed': apply_classifier_feeds(classifier_feeds[story['story_feed_id']], story['story_feed_id']),