def activate_names_requested_in(link): tree = get_comment_tree(link) acceptable_names = [] if tree.tree: top_level_cids = tree.tree[None] comments = chain.from_iterable(Comment._byID(chunk, return_dict=False, data=True) for chunk in in_chunks(top_level_cids)) for comment in sorted(comments, key=lambda c: c._ups, reverse=True): if comment._spam or comment._deleted: continue sanitized = comment.body.strip() match = valid_name_re.search(sanitized) if match: acceptable_names.append((comment, match.group(1))) # we activate one name for each 100% of rev goal met names = acceptable_names[:link.revenue_bucket] activate_names(link, names) activated_names = [name for comment, name in names] link.server_names = activated_names link.flair_text = ", ".join(activated_names) if names else "/dev/null" link.flair_css_class = "goal-bucket-%d" % link.revenue_bucket link._commit()
def comment_reply_effect(comment): if comment.parent_id is not None: parent = Comment._byID(comment.parent_id, data=True) else: parent = Link._byID(comment.link_id, data=True) all_effects = effects.get_all_effects([parent._fullname]) parent_effects = all_effects.get(parent._fullname, []) for item_name in parent_effects: item = items.get_item(item_name) item.on_reply(c.user, parent)
def fix_bare_links(apply=False): from r2.models import Comment from r2.lib.db.thing import NotFound fbefore = codecs.open('fix_bare_links_before.txt', 'w', 'utf-8') fafter = codecs.open('fix_bare_links_after.txt', 'w', 'utf-8') comment_id = 1 try: # The comments are retrieved like this to prevent the API from # attempting to load all comments at once and then iterating over them while True: comment = Comment._byID(comment_id, data=True) if (hasattr(comment, 'ob_imported') and comment.ob_imported) and (hasattr(comment, 'is_html') and comment.is_html): body = comment.body if isinstance(body, str): try: body = body.decode('utf-8') except UnicodeDecodeError: print >> sys.stderr, "UnicodeDecodeError, using 'ignore' error mode, comment: %d" % comment._id body = body.decode('utf-8', errors='ignore') new_content = rewrite_bare_links(body) if new_content != body: print >> fbefore, body print >> fafter, new_content if apply: comment.body = new_content comment._commit() try: print >> sys.stderr, "Rewrote comment %s" % comment.make_permalink_slow( ).encode('utf-8') except UnicodeError: print >> sys.stderr, "Rewrote comment with id: %d" % comment._id comment_id += 1 except NotFound: # Assumes that comment ids are sequential and never deleted # (which I believe to true) -- wjm print >> sys.stderr, "Comment %d not found, exiting" % comment_id return
def fix_bare_links(apply=False): from r2.models import Comment from r2.lib.db.thing import NotFound fbefore = codecs.open('fix_bare_links_before.txt', 'w', 'utf-8') fafter = codecs.open('fix_bare_links_after.txt', 'w', 'utf-8') comment_id = 1 try: # The comments are retrieved like this to prevent the API from # attempting to load all comments at once and then iterating over them while True: comment = Comment._byID(comment_id, data=True) if (hasattr(comment, 'ob_imported') and comment.ob_imported) and (hasattr(comment, 'is_html') and comment.is_html): body = comment.body if isinstance(body, str): try: body = body.decode('utf-8') except UnicodeDecodeError: print >>sys.stderr, "UnicodeDecodeError, using 'ignore' error mode, comment: %d" % comment._id body = body.decode('utf-8', errors='ignore') new_content = rewrite_bare_links(body) if new_content != body: print >>fbefore, body print >>fafter, new_content if apply: comment.body = new_content comment._commit() try: print >>sys.stderr, "Rewrote comment %s" % comment.make_permalink_slow().encode('utf-8') except UnicodeError: print >>sys.stderr, "Rewrote comment with id: %d" % comment._id comment_id += 1 except NotFound: # Assumes that comment ids are sequential and never deleted # (which I believe to true) -- wjm print >>sys.stderr, "Comment %d not found, exiting" % comment_id return
def comment_event(self, new_comment, request=None, context=None): """Create a 'comment' event for event-collector. new_comment: An r2.models.Comment object request, context: Should be pylons.request & pylons.c respectively """ from r2.models import Comment, Link event = Event( topic="comment_events", event_type="ss.comment", time=new_comment._date, request=request, context=context, truncatable_field="comment_body", ) event.add("comment_id", new_comment._id) event.add("comment_fullname", new_comment._fullname) event.add_text("comment_body", new_comment.body) post = Link._byID(new_comment.link_id) event.add("post_id", post._id) event.add("post_fullname", post._fullname) event.add("post_created_ts", to_epoch_milliseconds(post._date)) if post.promoted: event.add("post_is_promoted", bool(post.promoted)) if new_comment.parent_id: parent = Comment._byID(new_comment.parent_id) else: # If this is a top-level comment, parent is the same as the post parent = post event.add("parent_id", parent._id) event.add("parent_fullname", parent._fullname) event.add("parent_created_ts", to_epoch_milliseconds(parent._date)) event.add("user_neutered", new_comment.author_slow._spam) event.add_subreddit_fields(new_comment.subreddit_slow) self.save_event(event)
def on_use(self, user, target): link = Link._byID(target.link_id) comment_tree = get_comment_tree(link) child_ids = comment_tree.tree[target._id] grandchild_ids = [] for child_id in child_ids: grandchild_ids.extend(comment_tree.tree[child_id]) comments = Comment._byID(child_ids + grandchild_ids, data=True, return_dict=True) children = [comments[cid] for cid in child_ids] grandchildren = [comments[cid] for cid in grandchild_ids] for comment in itertools.chain([target], children, grandchildren): effects.add_effect(user, comment, self.item_name) self.apply_damage_and_log(user, [target], self.direct_damage) self.apply_damage_and_log(user, children, self.child_damage) self.apply_damage_and_log(user, grandchildren, self.grandchild_damage)
def _populate(after_id=None, estimate=54301242): from r2.models import Comment, CommentSortsCache, desc from r2.lib.db import tdb_cassandra from r2.lib import utils # larger has a chance to decrease the number of Cassandra writes, # but the probability is low chunk_size = 5000 q = Comment._query(Comment.c._spam == (True, False), Comment.c._deleted == (True, False), sort=desc("_date")) if after_id is not None: q._after(Comment._byID(after_id)) q = utils.fetch_things2(q, chunk_size=chunk_size) q = utils.progress(q, verbosity=chunk_size, estimate=estimate) for chunk in utils.in_chunks(q, chunk_size): chunk = filter(lambda x: hasattr(x, "link_id"), chunk) update_comment_votes(chunk, write_consistency_level=tdb_cassandra.CL.ONE)
def _populate(after_id = None, estimate=54301242): from r2.models import Comment, CommentSortsCache, desc from r2.lib.db import tdb_cassandra from r2.lib import utils # larger has a chance to decrease the number of Cassandra writes, # but the probability is low chunk_size = 5000 q = Comment._query(Comment.c._spam==(True,False), Comment.c._deleted==(True,False), sort=desc('_date')) if after_id is not None: q._after(Comment._byID(after_id)) q = utils.fetch_things2(q, chunk_size=chunk_size) q = utils.progress(q, verbosity=chunk_size, estimate = estimate) for chunk in utils.in_chunks(q, chunk_size): chunk = filter(lambda x: hasattr(x, 'link_id'), chunk) update_comment_votes(chunk, write_consistency_level = tdb_cassandra.CL.ONE)
def link_comments_and_sort(link_id, sort): from r2.models import Comment, CommentSortsCache # This has grown sort of organically over time. Right now the # cache of the comments tree consists in three keys: # 1. The comments_key: A tuple of # (cids, comment_tree, depth, num_children) # given: # cids =:= [comment_id] # comment_tree =:= dict(comment_id -> [comment_id]) # depth =:= dict(comment_id -> int depth) # num_children =:= dict(comment_id -> int num_children) # 2. The parent_comments_key =:= dict(comment_id -> parent_id) # 3. The comments_sorts keys =:= dict(comment_id36 -> float). # These are represented by a Cassandra model # (CommentSortsCache) rather than a permacache key. One of # these exists for each sort (hot, new, etc) # performance hack: preload these into the LocalCache at the same # time g.permacache.get_multi([comments_key(link_id), parent_comments_key(link_id)]) cids, cid_tree, depth, num_children = link_comments(link_id) # load the sorter sorter = _get_comment_sorter(link_id, sort) sorter_needed = [] if cids and not sorter: sorter_needed = cids g.log.debug("comment_tree.py: sorter (%s) cache miss for Link %s" % (sort, link_id)) sorter = {} sorter_needed = [x for x in cids if x not in sorter] if cids and sorter_needed: g.log.debug( "Error in comment_tree: sorter %r inconsistent (missing %d e.g. %r)" % (sort_comments_key(link_id, sort), len(sorter_needed), sorter_needed[:10])) if not g.disallow_db_writes: update_comment_votes(Comment._byID(sorter_needed, data=True, return_dict=False)) sorter.update(_comment_sorter_from_cids(sorter_needed, sort)) # load the parents key = parent_comments_key(link_id) parents = g.permacache.get(key) if parents is None: g.log.debug("comment_tree.py: parents cache miss for Link %s" % link_id) parents = {} elif cids and not all(x in parents for x in cids): g.log.debug("Error in comment_tree: parents inconsistent for Link %s" % link_id) parents = {} if not parents: with g.make_lock(lock_key(link_id)): # reload from the cache so the sorter and parents are # maximally consistent r = g.permacache.get(comments_key(link_id)) cids, cid_tree, depth, num_children = r key = parent_comments_key(link_id) if not parents: parents = _parent_dict_from_tree(cid_tree) g.permacache.set(key, parents) return cids, cid_tree, depth, num_children, parents, sorter
def _comment_sorter_from_cids(cids, sort): from r2.models import Comment comments = Comment._byID(cids, data = False, return_dict = False) return dict((x._id, _get_sort_value(x, sort)) for x in comments)
def get_items(self): timer = g.stats.get_timer("CommentBuilder.get_items") timer.start() r = link_comments_and_sort(self.link, self.sort.col) cids, cid_tree, depth, parents, sorter = r timer.intermediate("load_storage") if self.comment and not self.comment._id in depth: g.log.error( "Hack - self.comment (%d) not in depth. Defocusing..." % self.comment._id) self.comment = None more_recursions = {} dont_collapse = [] candidates = [] offset_depth = 0 if self.children: # requested specific child comments children = [cid for cid in self.children if cid in cids] self.update_candidates(candidates, sorter, children) dont_collapse.extend(comment for sort_val, comment in candidates) elif self.comment: # requested the tree from a specific comment # construct path back to top level from this comment, a maximum of # `context` levels comment = self.comment._id path = [] while comment and len(path) <= self.context: path.append(comment) comment = parents[comment] dont_collapse.extend(path) # rewrite cid_tree so the parents lead only to the requested comment for comment in path: parent = parents[comment] cid_tree[parent] = [comment] # start building comment tree from earliest comment self.update_candidates(candidates, sorter, path[-1]) # set offset_depth because we may not be at the top level and can # show deeper levels offset_depth = depth.get(path[-1], 0) else: # full tree requested, start with the top level comments top_level_comments = cid_tree.get(None, ()) self.update_candidates(candidates, sorter, top_level_comments) timer.intermediate("pick_candidates") if not candidates: timer.stop() return [] # choose which comments to show items = [] while (self.num is None or len(items) < self.num) and candidates: sort_val, comment_id = heapq.heappop(candidates) if comment_id not in cids: continue comment_depth = depth[comment_id] - offset_depth if comment_depth < self.max_depth: items.append(comment_id) # add children if comment_id in cid_tree: children = cid_tree[comment_id] self.update_candidates(candidates, sorter, children) elif (self.continue_this_thread and parents.get(comment_id) is not None): # the comment is too deep to add, so add a MoreRecursion for # its parent parent_id = parents[comment_id] if parent_id not in more_recursions: w = Wrapped( MoreRecursion(self.link, depth=0, parent_id=parent_id)) else: w = more_recursions[parent_id] w.children.append(comment_id) more_recursions[parent_id] = w timer.intermediate("pick_comments") # retrieve num_children for the visible comments top_level_candidates = [ comment for sort_val, comment in candidates if depth.get(comment, 0) == 0 ] needs_num_children = items + top_level_candidates num_children = get_num_children(needs_num_children, cid_tree) timer.intermediate("calc_num_children") comments = Comment._byID(items, data=True, return_dict=False, stale=self.stale) timer.intermediate("lookup_comments") wrapped = self.wrap_items(comments) timer.intermediate("wrap_comments") wrapped_by_id = {comment._id: comment for comment in wrapped} final = [] # We have some special collapsing rules for the Q&A sort type. # However, we want to show everything when we're building a specific # set of children (like from "load more" links) or when viewing a # comment permalink. qa_sort_hiding = ((self.sort.col == '_qa') and not self.children and self.comment is None) if qa_sort_hiding: special_responder_ids = self.link.responder_ids else: special_responder_ids = () max_relation_walks = g.max_comment_parent_walk for comment in wrapped: # skip deleted comments with no children if (comment.deleted and not cid_tree.has_key(comment._id) and not self.show_deleted): comment.hidden_completely = True continue comment.num_children = num_children[comment._id] comment.edits_visible = self.edits_visible # In the Q&A sort type, we want to collapse all comments other than # those that are: # # 1. Top-level comments, # 2. Responses from the OP(s), # 3. Responded to by the OP(s) (dealt with below), or # 4. Otherwise normally prevented from collapse (eg distinguished # comments). if (qa_sort_hiding and depth[comment._id] != 0 and # (1) comment.author_id not in special_responder_ids and # (2) not comment.prevent_collapse): # (4) comment.hidden = True if comment.collapsed and comment._id in dont_collapse: comment.collapsed = False comment.hidden = False parent = wrapped_by_id.get(comment.parent_id) if parent: if (qa_sort_hiding and comment.author_id in special_responder_ids): # Un-collapse parents as necessary. It's a lot easier to # do this here, upwards, than to check through all the # children when we were iterating at the parent. ancestor = parent counter = 0 while (ancestor and not getattr(ancestor, 'walked', False) and counter < max_relation_walks): ancestor.hidden = False # In case we haven't processed this comment yet. ancestor.prevent_collapse = True # This allows us to short-circuit when the rest of the # tree has already been uncollapsed. ancestor.walked = True ancestor = wrapped_by_id.get(ancestor.parent_id) counter += 1 # One more time through to actually add things to the final list. We # couldn't do that the first time because in the Q&A sort we don't know # if a comment should be visible until after we've processed all its # children. for comment in wrapped: if getattr(comment, 'hidden_completely', False): # Don't add it to the tree, don't put it in "load more", don't # acknowledge its existence at all. continue if getattr(comment, 'hidden', False): # Remove it from the list of visible comments so it'll # automatically be a candidate for the "load more" links. del wrapped_by_id[comment._id] # And don't add it to the tree. continue # add the comment as a child of its parent or to the top level of # the tree if it has no parent parent = wrapped_by_id.get(comment.parent_id) if parent: if not hasattr(parent, 'child'): add_child_listing(parent, comment) else: parent.child.things.append(comment) else: final.append(comment) for parent_id, more_recursion in more_recursions.iteritems(): if parent_id not in wrapped_by_id: continue parent = wrapped_by_id[parent_id] add_child_listing(parent, more_recursion) timer.intermediate("build_comments") if not self.load_more: timer.stop() return final # build MoreChildren for visible comments visible_comments = wrapped_by_id.keys() for visible_id in visible_comments: if visible_id in more_recursions: # don't add a MoreChildren if we already have a MoreRecursion continue children = cid_tree.get(visible_id, ()) missing_children = [ child for child in children if child not in visible_comments ] if missing_children: visible_children = (child for child in children if child in visible_comments) visible_count = sum(1 + num_children[child] for child in visible_children) missing_count = num_children[visible_id] - visible_count missing_depth = depth.get(visible_id, 0) + 1 - offset_depth if missing_depth < self.max_depth: mc = MoreChildren(self.link, self.sort, depth=missing_depth, parent_id=visible_id) mc.children.extend(missing_children) w = Wrapped(mc) w.count = missing_count else: mr = MoreRecursion(self.link, depth=missing_depth, parent_id=visible_id) w = Wrapped(mr) # attach the MoreChildren parent = wrapped_by_id[visible_id] if hasattr(parent, 'child'): parent.child.things.append(w) else: add_child_listing(parent, w) # build MoreChildren for missing root level comments if top_level_candidates: mc = MoreChildren(self.link, self.sort, depth=0, parent_id=None) mc.children.extend(top_level_candidates) w = Wrapped(mc) w.count = sum(1 + num_children[comment] for comment in top_level_candidates) final.append(w) if isinstance(self.sort, operators.shuffled): shuffle(final) timer.intermediate("build_morechildren") timer.stop() return final
def get_items(self): timer = g.stats.get_timer("CommentBuilder.get_items") timer.start() r = link_comments_and_sort(self.link, self.sort.col) cids, cid_tree, depth, parents, sorter = r timer.intermediate("load_storage") if self.comment and not self.comment._id in depth: g.log.error("Hack - self.comment (%d) not in depth. Defocusing..." % self.comment._id) self.comment = None more_recursions = {} dont_collapse = [] candidates = [] offset_depth = 0 if self.children: # requested specific child comments children = [child._id for child in self.children if child._id in cids] self.update_candidates(candidates, sorter, children) dont_collapse.extend(comment for sort_val, comment in candidates) elif self.comment: # requested the tree from a specific comment # construct path back to top level from this comment, a maximum of # `context` levels comment = self.comment._id path = [] while comment and len(path) <= self.context: path.append(comment) comment = parents[comment] dont_collapse.extend(path) # rewrite cid_tree so the parents lead only to the requested comment for comment in path: parent = parents[comment] cid_tree[parent] = [comment] # start building comment tree from earliest comment self.update_candidates(candidates, sorter, path[-1]) # set offset_depth because we may not be at the top level and can # show deeper levels offset_depth = depth.get(path[-1], 0) else: # full tree requested, start with the top level comments top_level_comments = cid_tree.get(None, ()) self.update_candidates(candidates, sorter, top_level_comments) timer.intermediate("pick_candidates") if not candidates: timer.stop() return [] # choose which comments to show items = [] while (self.num is None or len(items) < self.num) and candidates: sort_val, comment_id = heapq.heappop(candidates) if comment_id not in cids: continue comment_depth = depth[comment_id] - offset_depth if comment_depth < self.max_depth: items.append(comment_id) # add children if comment_id in cid_tree: children = cid_tree[comment_id] self.update_candidates(candidates, sorter, children) elif (self.continue_this_thread and parents.get(comment_id) is not None): # the comment is too deep to add, so add a MoreRecursion for # its parent parent_id = parents[comment_id] if parent_id not in more_recursions: w = Wrapped(MoreRecursion(self.link, depth=0, parent_id=parent_id)) else: w = more_recursions[parent_id] w.children.append(comment_id) more_recursions[parent_id] = w timer.intermediate("pick_comments") # retrieve num_children for the visible comments top_level_candidates = [comment for sort_val, comment in candidates if depth.get(comment, 0) == 0] needs_num_children = items + top_level_candidates num_children = get_num_children(needs_num_children, cid_tree) timer.intermediate("calc_num_children") comments = Comment._byID(items, data=True, return_dict=False, stale=self.stale) timer.intermediate("lookup_comments") wrapped = self.wrap_items(comments) timer.intermediate("wrap_comments") wrapped_by_id = {comment._id: comment for comment in wrapped} final = [] for comment in wrapped: # skip deleted comments with no children if (comment.deleted and not cid_tree.has_key(comment._id) and not c.user_is_admin): continue comment.num_children = num_children[comment._id] if comment.collapsed and comment._id in dont_collapse: comment.collapsed = False # add the comment as a child of its parent or to the top level of # the tree if it has no parent parent = wrapped_by_id.get(comment.parent_id) if parent: if not hasattr(parent, 'child'): parent.child = empty_listing() if not parent.deleted: parent.child.parent_name = parent._fullname parent.child.things.append(comment) else: final.append(comment) for parent_id, more_recursion in more_recursions.iteritems(): if parent_id not in wrapped_by_id: continue parent = wrapped_by_id[parent_id] parent.child = empty_listing(more_recursion) if not parent.deleted: parent.child.parent_name = parent._fullname timer.intermediate("build_comments") if not self.load_more: timer.stop() return final # build MoreChildren for visible comments visible_comments = wrapped_by_id.keys() for visible_id in visible_comments: if visible_id in more_recursions: # don't add a MoreChildren if we already have a MoreRecursion continue children = cid_tree.get(visible_id, ()) missing_children = [child for child in children if child not in visible_comments] if missing_children: visible_children = (child for child in children if child in visible_comments) visible_count = sum(1 + num_children[child] for child in visible_children) missing_count = num_children[visible_id] - visible_count missing_depth = depth.get(visible_id, 0) + 1 - offset_depth mc = MoreChildren(self.link, depth=missing_depth, parent_id=visible_id) mc.children.extend(missing_children) w = Wrapped(mc) w.count = missing_count # attach the MoreChildren parent = wrapped_by_id[visible_id] if hasattr(parent, 'child'): parent.child.things.append(w) else: parent.child = empty_listing(w) if not parent.deleted: parent.child.parent_name = parent._fullname # build MoreChildren for missing root level comments if top_level_candidates: mc = MoreChildren(self.link, depth=0, parent_id=None) mc.children.extend(top_level_candidates) w = Wrapped(mc) w.count = sum(1 + num_children[comment] for comment in top_level_candidates) final.append(w) if isinstance(self.sort, operators.shuffled): shuffle(final) timer.intermediate("build_morechildren") timer.stop() return final
def _handle_sort(msgs, chan): cids = list(set(int(msg.body) for msg in msgs)) comments = Comment._byID(cids, data = True, return_dict = False) print comments update_comment_votes(comments)
def get_items(self): timer = g.stats.get_timer("CommentBuilder.get_items") timer.start() r = link_comments_and_sort(self.link, self.sort.col) cids, cid_tree, depth, parents, sorter = r timer.intermediate("load_storage") if self.comment and not self.comment._id in depth: g.log.error("Hack - self.comment (%d) not in depth. Defocusing..." % self.comment._id) self.comment = None more_recursions = {} dont_collapse = [] candidates = [] offset_depth = 0 if self.children: # requested specific child comments children = [cid for cid in self.children if cid in cids] self.update_candidates(candidates, sorter, children) dont_collapse.extend(comment for sort_val, comment in candidates) elif self.comment: # requested the tree from a specific comment # construct path back to top level from this comment, a maximum of # `context` levels comment = self.comment._id path = [] while comment and len(path) <= self.context: path.append(comment) comment = parents[comment] dont_collapse.extend(path) # rewrite cid_tree so the parents lead only to the requested comment for comment in path: parent = parents[comment] cid_tree[parent] = [comment] # start building comment tree from earliest comment self.update_candidates(candidates, sorter, path[-1]) # set offset_depth because we may not be at the top level and can # show deeper levels offset_depth = depth.get(path[-1], 0) else: # full tree requested, start with the top level comments top_level_comments = cid_tree.get(None, ()) self.update_candidates(candidates, sorter, top_level_comments) timer.intermediate("pick_candidates") if not candidates: timer.stop() return [] # choose which comments to show items = [] while (self.num is None or len(items) < self.num) and candidates: sort_val, comment_id = heapq.heappop(candidates) if comment_id not in cids: continue comment_depth = depth[comment_id] - offset_depth if comment_depth < self.max_depth: items.append(comment_id) # add children if comment_id in cid_tree: children = cid_tree[comment_id] self.update_candidates(candidates, sorter, children) elif (self.continue_this_thread and parents.get(comment_id) is not None): # the comment is too deep to add, so add a MoreRecursion for # its parent parent_id = parents[comment_id] if parent_id not in more_recursions: w = Wrapped(MoreRecursion(self.link, depth=0, parent_id=parent_id)) else: w = more_recursions[parent_id] w.children.append(comment_id) more_recursions[parent_id] = w timer.intermediate("pick_comments") # retrieve num_children for the visible comments top_level_candidates = [comment for sort_val, comment in candidates if depth.get(comment, 0) == 0] needs_num_children = items + top_level_candidates num_children = get_num_children(needs_num_children, cid_tree) timer.intermediate("calc_num_children") comments = Comment._byID(items, data=True, return_dict=False, stale=self.stale) timer.intermediate("lookup_comments") wrapped = self.wrap_items(comments) timer.intermediate("wrap_comments") wrapped_by_id = {comment._id: comment for comment in wrapped} final = [] # We have some special collapsing rules for the Q&A sort type. # However, we want to show everything when we're building a specific # set of children (like from "load more" links) or when viewing a # comment permalink. qa_sort_hiding = ((self.sort.col == '_qa') and not self.children and self.comment is None) if qa_sort_hiding: special_responder_ids = self.link.responder_ids else: special_responder_ids = () max_relation_walks = g.max_comment_parent_walk for comment in wrapped: # skip deleted comments with no children if (comment.deleted and not cid_tree.has_key(comment._id) and not self.show_deleted): comment.hidden_completely = True continue comment.num_children = num_children[comment._id] comment.edits_visible = self.edits_visible # In the Q&A sort type, we want to collapse all comments other than # those that are: # # 1. Top-level comments, # 2. Responses from the OP(s), # 3. Responded to by the OP(s) (dealt with below), or # 4. Otherwise normally prevented from collapse (eg distinguished # comments). if (qa_sort_hiding and depth[comment._id] != 0 and # (1) comment.author_id not in special_responder_ids and # (2) not comment.prevent_collapse): # (4) comment.hidden = True if comment.collapsed and comment._id in dont_collapse: comment.collapsed = False comment.hidden = False parent = wrapped_by_id.get(comment.parent_id) if parent: if (qa_sort_hiding and comment.author_id in special_responder_ids): # Un-collapse parents as necessary. It's a lot easier to # do this here, upwards, than to check through all the # children when we were iterating at the parent. ancestor = parent counter = 0 while (ancestor and not getattr(ancestor, 'walked', False) and counter < max_relation_walks): ancestor.hidden = False # In case we haven't processed this comment yet. ancestor.prevent_collapse = True # This allows us to short-circuit when the rest of the # tree has already been uncollapsed. ancestor.walked = True ancestor = wrapped_by_id.get(ancestor.parent_id) counter += 1 # One more time through to actually add things to the final list. We # couldn't do that the first time because in the Q&A sort we don't know # if a comment should be visible until after we've processed all its # children. for comment in wrapped: if getattr(comment, 'hidden_completely', False): # Don't add it to the tree, don't put it in "load more", don't # acknowledge its existence at all. continue if getattr(comment, 'hidden', False): # Remove it from the list of visible comments so it'll # automatically be a candidate for the "load more" links. del wrapped_by_id[comment._id] # And don't add it to the tree. continue # add the comment as a child of its parent or to the top level of # the tree if it has no parent parent = wrapped_by_id.get(comment.parent_id) if parent: if not hasattr(parent, 'child'): add_child_listing(parent, comment) else: parent.child.things.append(comment) else: final.append(comment) for parent_id, more_recursion in more_recursions.iteritems(): if parent_id not in wrapped_by_id: continue parent = wrapped_by_id[parent_id] add_child_listing(parent, more_recursion) timer.intermediate("build_comments") if not self.load_more: timer.stop() return final # build MoreChildren for visible comments visible_comments = wrapped_by_id.keys() for visible_id in visible_comments: if visible_id in more_recursions: # don't add a MoreChildren if we already have a MoreRecursion continue children = cid_tree.get(visible_id, ()) missing_children = [child for child in children if child not in visible_comments] if missing_children: visible_children = (child for child in children if child in visible_comments) visible_count = sum(1 + num_children[child] for child in visible_children) missing_count = num_children[visible_id] - visible_count missing_depth = depth.get(visible_id, 0) + 1 - offset_depth if missing_depth < self.max_depth: mc = MoreChildren(self.link, self.sort, depth=missing_depth, parent_id=visible_id) mc.children.extend(missing_children) w = Wrapped(mc) w.count = missing_count else: mr = MoreRecursion(self.link, depth=missing_depth, parent_id=visible_id) w = Wrapped(mr) # attach the MoreChildren parent = wrapped_by_id[visible_id] if hasattr(parent, 'child'): parent.child.things.append(w) else: add_child_listing(parent, w) # build MoreChildren for missing root level comments if top_level_candidates: mc = MoreChildren(self.link, self.sort, depth=0, parent_id=None) mc.children.extend(top_level_candidates) w = Wrapped(mc) w.count = sum(1 + num_children[comment] for comment in top_level_candidates) final.append(w) if isinstance(self.sort, operators.shuffled): shuffle(final) timer.intermediate("build_morechildren") timer.stop() return final
def message_notification_email(data): """Queues a system email for a new message notification.""" from r2.lib.pages import MessageNotificationEmail timer_start = time.time() MAX_EMAILS_PER_USER = 30 MAX_MESSAGES_PER_BATCH = 5 total_messages_sent = 0 inbox_item_lookup_count = 0 unique_user_list = make_message_dict_unique(data) g.log.info( "there are %s users for this batch of emails" % len(unique_user_list)) for datum in unique_user_list.itervalues(): user = Account._byID36(datum['to'], data=True) g.log.info('user fullname: %s' % user._fullname) # In case a user has enabled the preference while it was enabled for # them, but we've since turned it off. We need to explicitly state the # user because we're not in the context of an HTTP request from them. if not feature.is_enabled('orangereds_as_emails', user=user): g.log.info('feature not enabled for user: %s' % user._fullname) continue # Don't send more than MAX_EMAILS_PER_USER per user per day user_notification_ratelimit = SimpleRateLimit( name="email_message_notification_%s" % user._id36, seconds=int(datetime.timedelta(days=1).total_seconds()), limit=MAX_EMAILS_PER_USER, ) if not user_notification_ratelimit.check(): g.log.info('message blocked at user_notification_ratelimit: %s' % user_notification_ratelimit) continue # Get all new messages that haven't been emailed inbox_items = get_unread_and_unemailed(user) inbox_item_lookup_count += 1 if not inbox_items: g.log.info('no inbox items found for %s' % user._fullname) continue newest_inbox_rel = inbox_items[-1][0] oldest_inbox_rel = inbox_items[0][0] now = datetime.datetime.now(g.tz) start_date = datetime.datetime.strptime(datum['start_date'], "%Y-%m-%d %H:%M:%S").replace(tzinfo=g.tz) # If messages are still being queued within the cooling period or # messages have been queued past the max delay, then keep waiting # a little longer to batch all of the messages up if (start_date != newest_inbox_rel._date and now < newest_inbox_rel._date + NOTIFICATION_EMAIL_COOLING_PERIOD and now < oldest_inbox_rel._date + NOTIFICATION_EMAIL_MAX_DELAY): g.log.info('messages still being batched for: %s' % user._fullname) continue messages = [] message_count = 0 more_unread_messages = False non_preview_usernames = set() # Batch messages to email starting with older messages for inbox_rel, message in inbox_items: # Get sender_name, replacing with display_author if it exists g.log.info('user fullname: %s, message fullname: %s' % ( user._fullname, message._fullname)) sender_name = get_sender_name(message) if message_count >= MAX_MESSAGES_PER_BATCH: # prevent duplicate usernames for template display non_preview_usernames.add(sender_name) more_unread_messages = True else: link = None parent = None if isinstance(message, Comment): permalink = message.make_permalink_slow(context=1, force_domain=True) if message.parent_id: parent = Comment._byID(message.parent_id, data=True) else: link = Link._byID(message.link_id, data=True) else: permalink = message.make_permalink(force_domain=True) message_type = get_message_type(message, parent, user, link) messages.append({ "author_name": sender_name, "message_type": message_type, "body": message.body, "date": long_datetime(message._date), "permalink": permalink, "id": message._id, "fullname": message._fullname, "subject": getattr(message, 'subject', ''), }) inbox_rel.emailed = True inbox_rel._commit() message_count += 1 mac = generate_notification_email_unsubscribe_token( datum['to'], user_email=user.email, user_password_hash=user.password) base = g.https_endpoint or g.origin unsubscribe_link = base + '/mail/unsubscribe/%s/%s' % (datum['to'], mac) inbox_url = base + '/message/inbox' # unique email_hash for emails, to be used in utm tags id_str = ''.join(str(message['id'] for message in messages)) email_hash = hashlib.sha1(id_str).hexdigest() base_utm_query = { 'utm_name': email_hash, 'utm_source': 'email', 'utm_medium':'message_notification', } non_preview_usernames_str = generate_non_preview_usernames_str( non_preview_usernames) templateData = { 'messages': messages, 'unsubscribe_link': unsubscribe_link, 'more_unread_messages': more_unread_messages, 'message_count': message_count, 'max_message_display_count': MAX_MESSAGES_PER_BATCH, 'non_preview_usernames_str': non_preview_usernames_str, 'base_url': base, 'base_utm_query': base_utm_query, 'inbox_url': inbox_url, } custom_headers = { 'List-Unsubscribe': "<%s>" % unsubscribe_link } g.log.info('sending message for user: %s' % user._fullname) g.email_provider.send_email( to_address=user.email, from_address="Reddit <%s>" % g.notification_email, subject=Email.subjects[Email.Kind.MESSAGE_NOTIFICATION], text=MessageNotificationEmail(**templateData).render(style='email'), html=MessageNotificationEmail(**templateData).render(style='html'), custom_headers=custom_headers, email_type='message_notification_email', ) total_messages_sent += 1 # report the email event to data pipeline g.events.orangered_email_event( request=request, context=c, user=user, messages=messages, email_hash=email_hash, reply_count=message_count, newest_reply_age=newest_inbox_rel._date, oldest_reply_age=oldest_inbox_rel._date, ) g.stats.simple_event('email.message_notification.queued') user_notification_ratelimit.record_usage() timer_end = time.time() g.log.info( "Took %s seconds to send orangered emails" % (timer_end - timer_start)) g.log.info("Total number of messages sent: %s" % total_messages_sent) g.log.info("Total count of inbox lookups: %s" % inbox_item_lookup_count)
def link_comments_and_sort(link_id, sort): from r2.models import Comment, CommentSortsCache # This has grown sort of organically over time. Right now the # cache of the comments tree consists in three keys: # 1. The comments_key: A tuple of # (cids, comment_tree, depth, num_children) # given: # cids =:= [comment_id] # comment_tree =:= dict(comment_id -> [comment_id]) # depth =:= dict(comment_id -> int depth) # num_children =:= dict(comment_id -> int num_children) # 2. The parent_comments_key =:= dict(comment_id -> parent_id) # 3. The comments_sorts keys =:= dict(comment_id36 -> float). # These are represented by a Cassandra model # (CommentSortsCache) rather than a permacache key. One of # these exists for each sort (hot, new, etc) # performance hack: preload these into the LocalCache at the same # time g.permacache.get_multi( [comments_key(link_id), parent_comments_key(link_id)]) cids, cid_tree, depth, num_children = link_comments(link_id) # load the sorter sorter = _get_comment_sorter(link_id, sort) sorter_needed = [] if cids and not sorter: sorter_needed = cids g.log.debug("comment_tree.py: sorter (%s) cache miss for Link %s" % (sort, link_id)) sorter = {} sorter_needed = [x for x in cids if x not in sorter] if cids and sorter_needed: g.log.debug( "Error in comment_tree: sorter %r inconsistent (missing %d e.g. %r)" % (sort_comments_key( link_id, sort), len(sorter_needed), sorter_needed[:10])) if not g.disallow_db_writes: update_comment_votes( Comment._byID(sorter_needed, data=True, return_dict=False)) sorter.update(_comment_sorter_from_cids(sorter_needed, sort)) # load the parents key = parent_comments_key(link_id) parents = g.permacache.get(key) if parents is None: g.log.debug("comment_tree.py: parents cache miss for Link %s" % link_id) parents = {} elif cids and not all(x in parents for x in cids): g.log.debug("Error in comment_tree: parents inconsistent for Link %s" % link_id) parents = {} if not parents and len(cids) > 0: with g.make_lock(lock_key(link_id)): # reload from the cache so the sorter and parents are # maximally consistent r = g.permacache.get(comments_key(link_id)) cids, cid_tree, depth, num_children = r key = parent_comments_key(link_id) if not parents: parents = _parent_dict_from_tree(cid_tree) g.permacache.set(key, parents) return cids, cid_tree, depth, num_children, parents, sorter
def link_comments_and_sort(link_id, sort): from r2.models import Comment, CommentSortsCache #print "Linking" #print get_bestresponses(link_id) # This has grown sort of organically over time. Right now the # cache of the comments tree consists in three keys: # 1. The comments_key: A tuple of # (cids, comment_tree, depth, num_children) # given: # cids =:= [comment_id] # comment_tree =:= dict(comment_id -> [comment_id]) # depth =:= dict(comment_id -> int depth) # num_children =:= dict(comment_id -> int num_children) # 2. The parent_comments_key =:= dict(comment_id -> parent_id) # 3. The comments_sorts keys =:= dict(comment_id36 -> float). # These are represented by a Cassandra model # (CommentSortsCache) rather than a permacache key. One of # these exists for each sort (hot, new, etc) # performance hack: preload these into the LocalCache at the same # time g.permacache.get_multi([comments_key(link_id), parent_comments_key(link_id)]) cids, cid_tree, depth, num_children = link_comments(link_id) #print "SQUIRREL!" #items = Comment._byID(cids, data = True, return_dict = False) #for it in items: # if it._id36=="6u": # print it._moocow #bad=[] #for it in items: # try: # tmp=it._moocow #except AttributeError: # bad.append(it._id) #print bad #print cid_tree #for key in cid_tree.keys(): # for b in bad: # cid_tree[key].remove(b) #print cid_tree #for b in bad: # cids.remove(b) #cid_tree={None:[224L]} #print cids #cids=[224L] #for b in bad: # del depth[b] #del num_children[b] #print depth #cids={224L:0} #print num_children #num_children={224L:0} # load the sorter sorter = _get_comment_sorter(link_id, sort) sorter_needed = [] if cids and not sorter: sorter_needed = cids g.log.debug("comment_tree.py: sorter (%s) cache miss for Link %s" % (sort, link_id)) sorter = {} sorter_needed = [x for x in cids if x not in sorter] if cids and sorter_needed: g.log.debug( "Error in comment_tree: sorter %r inconsistent (missing %d e.g. %r)" % (sort_comments_key(link_id, sort), len(sorter_needed), sorter_needed[:10])) if not g.disallow_db_writes: update_comment_votes(Comment._byID(sorter_needed, data=True, return_dict=False)) sorter.update(_comment_sorter_from_cids(sorter_needed, sort)) # load the parents key = parent_comments_key(link_id) parents = g.permacache.get(key) if parents is None: g.log.debug("comment_tree.py: parents cache miss for Link %s" % link_id) parents = {} elif cids and not all(x in parents for x in cids): g.log.debug("Error in comment_tree: parents inconsistent for Link %s" % link_id) parents = {} if not parents: with g.make_lock(lock_key(link_id)): # reload from the cache so the sorter and parents are # maximally consistent r = g.permacache.get(comments_key(link_id)) cids, cid_tree, depth, num_children = r key = parent_comments_key(link_id) if not parents: parents = _parent_dict_from_tree(cid_tree) g.permacache.set(key, parents) return cids, cid_tree, depth, num_children, parents, sorter
def _get_comments(self): timer = g.stats.get_timer("CommentBuilder.get_items") timer.start() r = link_comments_and_sort(self.link, self.sort.col) cids, cid_tree, depth, parents, sorter = r timer.intermediate("load_storage") if self.comment and not self.comment._id in depth: g.log.error("Hack - self.comment (%d) not in depth. Defocusing..." % self.comment._id) self.comment = None more_recursions = {} dont_collapse = [] candidates = [] offset_depth = 0 if self.children: # requested specific child comments children = [cid for cid in self.children if cid in cids] self.update_candidates(candidates, sorter, children) dont_collapse.extend(comment for sort_val, comment in candidates) elif self.comment: # requested the tree from a specific comment # construct path back to top level from this comment, a maximum of # `context` levels comment = self.comment._id path = [] while comment and len(path) <= self.context: path.append(comment) comment = parents[comment] dont_collapse.extend(path) # rewrite cid_tree so the parents lead only to the requested comment for comment in path: parent = parents[comment] cid_tree[parent] = [comment] # start building comment tree from earliest comment self.update_candidates(candidates, sorter, path[-1]) # set offset_depth because we may not be at the top level and can # show deeper levels offset_depth = depth.get(path[-1], 0) else: # full tree requested, start with the top level comments top_level_comments = cid_tree.get(None, ()) self.update_candidates(candidates, sorter, top_level_comments) timer.intermediate("pick_candidates") # choose which comments to show items = [] while (self.num is None or len(items) < self.num) and candidates: sort_val, comment_id = heapq.heappop(candidates) if comment_id not in cids: continue comment_depth = depth[comment_id] - offset_depth if comment_depth < self.max_depth: items.append(comment_id) # add children if comment_id in cid_tree: children = cid_tree[comment_id] self.update_candidates(candidates, sorter, children) elif (self.continue_this_thread and parents.get(comment_id) is not None): # the comment is too deep to add, so add a MoreRecursion for # its parent parent_id = parents[comment_id] if parent_id not in more_recursions: w = Wrapped(MoreRecursion(self.link, depth=0, parent_id=parent_id)) else: w = more_recursions[parent_id] w.children.append(comment_id) more_recursions[parent_id] = w timer.intermediate("pick_comments") self.top_level_candidates = [comment for sort_val, comment in candidates if depth.get(comment, 0) == 0] self.comments = Comment._byID( items, data=True, return_dict=False, stale=self.stale) timer.intermediate("lookup_comments") self.timer = timer self.cid_tree = cid_tree self.depth = depth self.more_recursions = more_recursions self.offset_depth = offset_depth self.dont_collapse = dont_collapse
def _handle_sort(msgs, chan): cids = list(set(int(msg.body) for msg in msgs)) comments = Comment._byID(cids, data=True, return_dict=False) print comments update_comment_votes(comments)
def _comment_sorter_from_cids(cids, sort): from r2.models import Comment comments = Comment._byID(cids, data=False, return_dict=False) return dict((x._id, _get_sort_value(x, sort)) for x in comments)
def message_notification_email(data): """Queues a system email for a new message notification.""" from r2.lib.pages import MessageNotificationEmail timer_start = time.time() MAX_EMAILS_PER_USER = 30 MAX_MESSAGES_PER_BATCH = 5 total_messages_sent = 0 inbox_item_lookup_count = 0 unique_user_list = make_message_dict_unique(data) g.log.info("there are %s users for this batch of emails" % len(unique_user_list)) for datum in unique_user_list.itervalues(): user = Account._byID36(datum['to'], data=True) g.log.info('user fullname: %s' % user._fullname) # In case a user has enabled the preference while it was enabled for # them, but we've since turned it off. We need to explicitly state the # user because we're not in the context of an HTTP request from them. if not feature.is_enabled('orangereds_as_emails', user=user): g.log.info('feature not enabled for user: %s' % user._fullname) continue # Don't send more than MAX_EMAILS_PER_USER per user per day user_notification_ratelimit = SimpleRateLimit( name="email_message_notification_%s" % user._id36, seconds=int(datetime.timedelta(days=1).total_seconds()), limit=MAX_EMAILS_PER_USER, ) if not user_notification_ratelimit.check(): g.log.info('message blocked at user_notification_ratelimit: %s' % user_notification_ratelimit) continue # Get all new messages that haven't been emailed inbox_items = get_unread_and_unemailed(user) inbox_item_lookup_count += 1 if not inbox_items: g.log.info('no inbox items found for %s' % user._fullname) continue newest_inbox_rel = inbox_items[-1][0] oldest_inbox_rel = inbox_items[0][0] now = datetime.datetime.now(g.tz) start_date = datetime.datetime.strptime( datum['start_date'], "%Y-%m-%d %H:%M:%S").replace(tzinfo=g.tz) # If messages are still being queued within the cooling period or # messages have been queued past the max delay, then keep waiting # a little longer to batch all of the messages up if (start_date != newest_inbox_rel._date and now < newest_inbox_rel._date + NOTIFICATION_EMAIL_COOLING_PERIOD and now < oldest_inbox_rel._date + NOTIFICATION_EMAIL_MAX_DELAY): g.log.info('messages still being batched for: %s' % user._fullname) continue messages = [] message_count = 0 more_unread_messages = False non_preview_usernames = set() # Batch messages to email starting with older messages for inbox_rel, message in inbox_items: # Get sender_name, replacing with display_author if it exists g.log.info('user fullname: %s, message fullname: %s' % (user._fullname, message._fullname)) sender_name = get_sender_name(message) if message_count >= MAX_MESSAGES_PER_BATCH: # prevent duplicate usernames for template display non_preview_usernames.add(sender_name) more_unread_messages = True else: link = None parent = None if isinstance(message, Comment): permalink = message.make_permalink_slow(context=1, force_domain=True) if message.parent_id: parent = Comment._byID(message.parent_id, data=True) else: link = Link._byID(message.link_id, data=True) else: permalink = message.make_permalink(force_domain=True) message_type = get_message_type(message, parent, user, link) messages.append({ "author_name": sender_name, "message_type": message_type, "body": message.body, "date": long_datetime(message._date), "permalink": permalink, "id": message._id, "fullname": message._fullname, "subject": getattr(message, 'subject', ''), }) inbox_rel.emailed = True inbox_rel._commit() message_count += 1 mac = generate_notification_email_unsubscribe_token( datum['to'], user_email=user.email, user_password_hash=user.password) base = g.https_endpoint or g.origin unsubscribe_link = base + '/mail/unsubscribe/%s/%s' % (datum['to'], mac) inbox_url = base + '/message/inbox' # unique email_hash for emails, to be used in utm tags id_str = ''.join(str(message['id'] for message in messages)) email_hash = hashlib.sha1(id_str).hexdigest() base_utm_query = { 'utm_name': email_hash, 'utm_source': 'email', 'utm_medium': 'message_notification', } non_preview_usernames_str = generate_non_preview_usernames_str( non_preview_usernames) templateData = { 'messages': messages, 'unsubscribe_link': unsubscribe_link, 'more_unread_messages': more_unread_messages, 'message_count': message_count, 'max_message_display_count': MAX_MESSAGES_PER_BATCH, 'non_preview_usernames_str': non_preview_usernames_str, 'base_url': base, 'base_utm_query': base_utm_query, 'inbox_url': inbox_url, } custom_headers = {'List-Unsubscribe': "<%s>" % unsubscribe_link} g.log.info('sending message for user: %s' % user._fullname) g.email_provider.send_email( to_address=user.email, from_address="Reddit <%s>" % g.notification_email, subject=Email.subjects[Email.Kind.MESSAGE_NOTIFICATION], text=MessageNotificationEmail(**templateData).render( style='email'), html=MessageNotificationEmail(**templateData).render(style='html'), custom_headers=custom_headers, email_type='message_notification_email', ) total_messages_sent += 1 # report the email event to data pipeline g.events.orangered_email_event( request=request, context=c, user=user, messages=messages, email_hash=email_hash, reply_count=message_count, newest_reply_age=newest_inbox_rel._date, oldest_reply_age=oldest_inbox_rel._date, ) g.stats.simple_event('email.message_notification.queued') user_notification_ratelimit.record_usage() timer_end = time.time() g.log.info("Took %s seconds to send orangered emails" % (timer_end - timer_start)) g.log.info("Total number of messages sent: %s" % total_messages_sent) g.log.info("Total count of inbox lookups: %s" % inbox_item_lookup_count)
def get_items(self): timer = g.stats.get_timer("CommentBuilder.get_items") timer.start() r = link_comments_and_sort(self.link, self.sort.col) cids, cid_tree, depth, parents, sorter = r timer.intermediate("load_storage") if self.comment and not self.comment._id in depth: g.log.error("Hack - self.comment (%d) not in depth. Defocusing..." % self.comment._id) self.comment = None more_recursions = {} dont_collapse = [] candidates = [] offset_depth = 0 if self.children: # requested specific child comments children = [child._id for child in self.children if child._id in cids] self.update_candidates(candidates, sorter, children) dont_collapse.extend(comment for sort_val, comment in candidates) elif self.comment: # requested the tree from a specific comment # construct path back to top level from this comment, a maximum of # `context` levels comment = self.comment._id path = [] while comment and len(path) <= self.context: path.append(comment) comment = parents[comment] dont_collapse.extend(path) # rewrite cid_tree so the parents lead only to the requested comment for comment in path: parent = parents[comment] cid_tree[parent] = [comment] # start building comment tree from earliest comment self.update_candidates(candidates, sorter, path[-1]) # set offset_depth because we may not be at the top level and can # show deeper levels offset_depth = depth.get(path[-1], 0) else: # full tree requested, start with the top level comments top_level_comments = cid_tree.get(None, ()) self.update_candidates(candidates, sorter, top_level_comments) timer.intermediate("pick_candidates") if not candidates: timer.stop() return [] # choose which comments to show items = [] while (self.num is None or len(items) < self.num) and candidates: sort_val, comment_id = heapq.heappop(candidates) if comment_id not in cids: continue comment_depth = depth[comment_id] - offset_depth if comment_depth < self.max_depth: items.append(comment_id) # add children if comment_id in cid_tree: children = cid_tree[comment_id] self.update_candidates(candidates, sorter, children) elif (self.continue_this_thread and parents.get(comment_id) is not None): # the comment is too deep to add, so add a MoreRecursion for # its parent parent_id = parents[comment_id] if parent_id not in more_recursions: w = Wrapped(MoreRecursion(self.link, depth=0, parent_id=parent_id)) else: w = more_recursions[parent_id] w.children.append(comment_id) more_recursions[parent_id] = w timer.intermediate("pick_comments") # retrieve num_children for the visible comments top_level_candidates = [comment for sort_val, comment in candidates if depth.get(comment, 0) == 0] needs_num_children = items + top_level_candidates num_children = get_num_children(needs_num_children, cid_tree) timer.intermediate("calc_num_children") comments = Comment._byID(items, data=True, return_dict=False, stale=self.stale) timer.intermediate("lookup_comments") wrapped = self.wrap_items(comments) timer.intermediate("wrap_comments") wrapped_by_id = {comment._id: comment for comment in wrapped} final = [] for comment in wrapped: # skip deleted comments with no children if (comment.deleted and not cid_tree.has_key(comment._id) and not c.user_is_admin): continue comment.num_children = num_children[comment._id] if comment.collapsed and comment._id in dont_collapse: comment.collapsed = False # add the comment as a child of its parent or to the top level of # the tree if it has no parent parent = wrapped_by_id.get(comment.parent_id) if parent: if not hasattr(parent, 'child'): parent.child = empty_listing() if not parent.deleted: parent.child.parent_name = parent._fullname parent.child.things.append(comment) else: final.append(comment) for parent_id, more_recursion in more_recursions.iteritems(): if parent_id not in wrapped_by_id: continue parent = wrapped_by_id[parent_id] parent.child = empty_listing(more_recursion) if not parent.deleted: parent.child.parent_name = parent._fullname timer.intermediate("build_comments") if not self.load_more: timer.stop() return final # build MoreChildren for visible comments visible_comments = wrapped_by_id.keys() for visible_id in visible_comments: if visible_id in more_recursions: # don't add a MoreChildren if we already have a MoreRecursion continue children = cid_tree.get(visible_id, ()) missing_children = [child for child in children if child not in visible_comments] if missing_children: visible_children = (child for child in children if child in visible_comments) visible_count = sum(1 + num_children[child] for child in visible_children) missing_count = num_children[visible_id] - visible_count missing_depth = depth.get(visible_id, 0) + 1 - offset_depth if missing_depth < self.max_depth: mc = MoreChildren(self.link, depth=missing_depth, parent_id=visible_id) mc.children.extend(missing_children) w = Wrapped(mc) w.count = missing_count else: mr = MoreRecursion(self.link, depth=missing_depth, parent_id=visible_id) w = Wrapped(mr) # attach the MoreChildren parent = wrapped_by_id[visible_id] if hasattr(parent, 'child'): parent.child.things.append(w) else: parent.child = empty_listing(w) if not parent.deleted: parent.child.parent_name = parent._fullname # build MoreChildren for missing root level comments if top_level_candidates: mc = MoreChildren(self.link, depth=0, parent_id=None) mc.children.extend(top_level_candidates) w = Wrapped(mc) w.count = sum(1 + num_children[comment] for comment in top_level_candidates) final.append(w) if isinstance(self.sort, operators.shuffled): shuffle(final) timer.intermediate("build_morechildren") timer.stop() return final