def add_comments(comments): """Add comments to the CommentTree and update scores.""" from r2.models.builder import write_comment_orders link_ids = [comment.link_id for comment in tup(comments)] links = Link._byID(link_ids, data=True) comments = tup(comments) comments_by_link_id = defaultdict(list) for comment in comments: comments_by_link_id[comment.link_id].append(comment) for link_id, link_comments in comments_by_link_id.iteritems(): link = links[link_id] timer = g.stats.get_timer( 'comment_tree.add.%s' % link.comment_tree_version) timer.start() # write scores before CommentTree because the scores must exist for all # comments in the tree for sort in ("_controversy", "_confidence", "_score"): scores_by_comment = { comment._id36: getattr(comment, sort) for comment in link_comments } CommentScoresByLink.set_scores(link, sort, scores_by_comment) scores_by_comment = _get_qa_comment_scores(link, link_comments) CommentScoresByLink.set_scores(link, "_qa", scores_by_comment) timer.intermediate('scores') with CommentTree.mutation_context(link, timeout=180): try: timer.intermediate('lock') comment_tree = CommentTree.by_link(link, timer) timer.intermediate('get') comment_tree.add_comments(link_comments) timer.intermediate('update') except InconsistentCommentTreeError: # failed to add a comment to the CommentTree because its parent # is missing from the tree. this comment will be lost forever # unless a rebuild is performed. comment_ids = [comment._id for comment in link_comments] g.log.error( "comment_tree_inconsistent: %s %s" % (link, comment_ids)) g.stats.simple_event('comment_tree_inconsistent') return # do this under the same lock because we want to ensure we are using # the same version of the CommentTree as was just written write_comment_orders(link) timer.intermediate('write_order') timer.stop()
def add_comments(comments): """Add comments to the CommentTree and update scores.""" from r2.models.builder import write_comment_orders link_ids = [comment.link_id for comment in tup(comments)] links = Link._byID(link_ids, data=True) comments = tup(comments) comments_by_link_id = defaultdict(list) for comment in comments: comments_by_link_id[comment.link_id].append(comment) for link_id, link_comments in comments_by_link_id.iteritems(): link = links[link_id] timer = g.stats.get_timer("comment_tree.add.1") timer.start() # write scores before CommentTree because the scores must exist for all # comments in the tree for sort in ("_controversy", "_confidence", "_score"): scores_by_comment = {comment._id36: getattr(comment, sort) for comment in link_comments} CommentScoresByLink.set_scores(link, sort, scores_by_comment) scores_by_comment = _get_qa_comment_scores(link, link_comments) CommentScoresByLink.set_scores(link, "_qa", scores_by_comment) timer.intermediate("scores") CommentTree.add_comments(link, link_comments) timer.intermediate("update") write_comment_orders(link) timer.intermediate("write_order") timer.stop()
def add_comments(comments): """Add comments to the CommentTree and update scores.""" from r2.models.builder import write_comment_orders link_ids = [comment.link_id for comment in tup(comments)] links_by_id = Link._byID(link_ids) comments = tup(comments) comments_by_link_id = defaultdict(list) for comment in comments: comments_by_link_id[comment.link_id].append(comment) for link_id, link_comments in comments_by_link_id.iteritems(): link = links_by_id[link_id] timer = g.stats.get_timer('comment_tree.add.1') timer.start() write_comment_scores(link, link_comments) timer.intermediate('scores') CommentTree.add_comments(link, link_comments) timer.intermediate('update') write_comment_orders(link) timer.intermediate('write_order') timer.stop()
def update_score(obj, up_change, down_change, new_valid_thing, old_valid_thing): obj._incr('_ups', up_change) obj._incr('_downs', down_change) if isinstance(obj, Comment): if hasattr(obj, 'parent_id'): Comment._byID(obj.parent_id).incr_descendant_karma([], up_change - down_change) Link._byID(obj.link_id)._incr('_descendant_karma', up_change - down_change)
def update_comment_votes(comments, write_consistency_level = None): from r2.models import CommentSortsCache comments = tup(comments) link_map = {} for com in comments: link_map.setdefault(com.link_id, []).append(com) all_links = Link._byID(link_map.keys(), data=True) comment_trees = {} for link in all_links.values(): comment_trees[link._id] = get_comment_tree(link) for link_id, coms in link_map.iteritems(): link = all_links[link_id] for sort in ("_controversy", "_hot", "_confidence", "_score", "_date", "_qa"): cid_tree = comment_trees[link_id].tree sorter = _comment_sorter_from_cids(coms, sort, link, cid_tree, by_36=True) # Cassandra always uses the id36 instead of the integer # ID, so we'll map that first before sending it c_key = sort_comments_key(link_id, sort) CommentSortsCache._set_values(c_key, sorter, write_consistency_level = write_consistency_level)
def add_comments(comments): links = Link._byID([com.link_id for com in tup(comments)], data=True) comments = tup(comments) link_map = {} for com in comments: link_map.setdefault(com.link_id, []).append(com) for link_id, coms in link_map.iteritems(): link = links[link_id] add_comments = [comment for comment in coms if not comment._deleted] delete_comments = (comment for comment in coms if comment._deleted) timer = g.stats.get_timer("comment_tree.add.%s" % link.comment_tree_version) timer.start() try: with CommentTree.mutation_context(link): timer.intermediate("lock") cache = get_comment_tree(link, timer=timer) timer.intermediate("get") if add_comments: cache.add_comments(add_comments) for comment in delete_comments: cache.delete_comment(comment, link) timer.intermediate("update") except: g.log.exception("add_comments_nolock failed for link %s, recomputing tree", link_id) # calculate it from scratch get_comment_tree(link, _update=True, timer=timer) timer.stop() update_comment_votes(coms)
def add_comments(comments): links = Link._byID([com.link_id for com in tup(comments)], data=True) comments = tup(comments) link_map = {} for com in comments: link_map.setdefault(com.link_id, []).append(com) for link_id, coms in link_map.iteritems(): link = links[link_id] add_comments = [comment for comment in coms if not comment._deleted] delete_comments = (comment for comment in coms if comment._deleted) timer = g.stats.get_timer('comment_tree.add.%s' % link.comment_tree_version) timer.start() try: with CommentTree.mutation_context(link, timeout=30): timer.intermediate('lock') cache = get_comment_tree(link, timer=timer) timer.intermediate('get') if add_comments: cache.add_comments(add_comments) for comment in delete_comments: cache.delete_comment(comment, link) timer.intermediate('update') except InconsistentCommentTreeError: comment_ids = [comment._id for comment in coms] g.log.exception( 'add_comments_nolock failed for link %s %s, recomputing', link_id, comment_ids) rebuild_comment_tree(link, timer=timer) timer.stop() update_comment_votes(coms)
def add_comments(comments): links = Link._byID([com.link_id for com in tup(comments)], data=True) comments = tup(comments) link_map = {} for com in comments: link_map.setdefault(com.link_id, []).append(com) for link_id, coms in link_map.iteritems(): link = links[link_id] timer = g.stats.get_timer('comment_tree.add.%s' % link.comment_tree_version) timer.start() try: with CommentTree.mutation_context(link): timer.intermediate('lock') cache = get_comment_tree(link, timer=timer) timer.intermediate('get') cache.add_comments(coms) timer.intermediate('update') except: g.log.exception( 'add_comments_nolock failed for link %s, recomputing tree', link_id) # calculate it from scratch get_comment_tree(link, _update=True, timer=timer) timer.stop() update_comment_votes(coms)
def link_comments(link_id, _update=False): key = comments_key(link_id) r = g.permacache.get(key) if r and not _update: return r else: # This operation can take longer than most (note the inner # locks) better to time out request temporarily than to deal # with an inconsistent tree with g.make_lock(lock_key(link_id), timeout=180): r = _load_link_comments(link_id) # rebuild parent dict cids, cid_tree, depth, num_children, num_comments = r r = r[:-1] # Remove num_comments from r; we don't need to cache it. g.permacache.set(parent_comments_key(link_id), _parent_dict_from_tree(cid_tree)) g.permacache.set(key, r) # update the link's comment count and schedule it for search # reindexing link = Link._byID(link_id, data = True) link.num_comments = num_comments link._commit() from r2.lib.db.queries import changed changed(link) return r
def delete_comment(comment): link = Link._byID(comment.link_id, data=True) timer = g.stats.get_timer('comment_tree.delete.%s' % link.comment_tree_version) timer.start() with CommentTree.mutation_context(link): timer.intermediate('lock') cache = get_comment_tree(link) timer.intermediate('get') cache.delete_comment(comment, link) timer.intermediate('update') from r2.lib.db.queries import changed changed([link]) timer.intermediate('changed') timer.stop()
def delete_comment(comment): with g.make_lock(lock_key(comment.link_id)): cids, comment_tree, depth, num_children = link_comments(comment.link_id) # only completely remove comments with no children if comment._id not in comment_tree: if comment._id in cids: cids.remove(comment._id) if comment._id in depth: del depth[comment._id] if comment._id in num_children: del num_children[comment._id] g.permacache.set(comments_key(comment.link_id), (cids, comment_tree, depth, num_children)) # update the link's comment count and schedule it for search reindexing link = Link._byID(comment.link_id, data = True) link._incr('num_comments', -1) from r2.lib.db.queries import changed changed(link)
def update_comment_votes(comments): from r2.models import CommentScoresByLink comments = tup(comments) link_map = {} for com in comments: link_map.setdefault(com.link_id, []).append(com) all_links = Link._byID(link_map.keys(), data=True) comment_trees = {} for link in all_links.values(): comment_trees[link._id] = get_comment_tree(link) for link_id, coms in link_map.iteritems(): link = all_links[link_id] for sort in ("_controversy", "_hot", "_confidence", "_score", "_date", "_qa"): cid_tree = comment_trees[link_id].tree scores_by_comment = _comment_sorter_from_cids(coms, sort, link, cid_tree, by_36=True) CommentScoresByLink.set_scores(link, sort, scores_by_comment)
def update_comment_votes(comments): from r2.models import CommentScoresByLink comments = tup(comments) comments_by_link_id = defaultdict(list) for comment in comments: comments_by_link_id[comment.link_id].append(comment) links_by_id = Link._byID(comments_by_link_id.keys(), data=True) for link_id, link_comments in comments_by_link_id.iteritems(): link = links_by_id[link_id] for sort in ("_controversy", "_confidence", "_score"): scores_by_comment = { comment._id36: getattr(comment, sort) for comment in link_comments } CommentScoresByLink.set_scores(link, sort, scores_by_comment) scores_by_comment = _get_qa_comment_scores(link, link_comments) CommentScoresByLink.set_scores(link, "_qa", scores_by_comment)
def add_comments(comments): """Add comments to the CommentTree and update scores.""" from r2.models.builder import write_comment_orders link_ids = [comment.link_id for comment in tup(comments)] links = Link._byID(link_ids, data=True) comments = tup(comments) comments_by_link_id = defaultdict(list) for comment in comments: comments_by_link_id[comment.link_id].append(comment) for link_id, link_comments in comments_by_link_id.iteritems(): link = links[link_id] timer = g.stats.get_timer('comment_tree.add.1') timer.start() # write scores before CommentTree because the scores must exist for all # comments in the tree for sort in ("_controversy", "_confidence", "_score"): scores_by_comment = { comment._id36: getattr(comment, sort) for comment in link_comments } CommentScoresByLink.set_scores(link, sort, scores_by_comment) scores_by_comment = _get_qa_comment_scores(link, link_comments) CommentScoresByLink.set_scores(link, "_qa", scores_by_comment) timer.intermediate('scores') CommentTree.add_comments(link, link_comments) timer.intermediate('update') write_comment_orders(link) timer.intermediate('write_order') timer.stop()
def add_comments(comments): """Add comments to the CommentTree and update scores.""" from r2.models.builder import write_comment_orders link_ids = [comment.link_id for comment in tup(comments)] links = Link._byID(link_ids, data=True) comments = tup(comments) comments_by_link_id = defaultdict(list) for comment in comments: comments_by_link_id[comment.link_id].append(comment) for link_id, link_comments in comments_by_link_id.iteritems(): link = links[link_id] new_comments = [ comment for comment in link_comments if not comment._deleted] deleted_comments = [ comment for comment in link_comments if comment._deleted] timer = g.stats.get_timer( 'comment_tree.add.%s' % link.comment_tree_version) timer.start() # write scores before CommentTree because the scores must exist for all # comments in the tree for sort in ("_controversy", "_confidence", "_score"): scores_by_comment = { comment._id36: getattr(comment, sort) for comment in link_comments } CommentScoresByLink.set_scores(link, sort, scores_by_comment) scores_by_comment = _get_qa_comment_scores(link, link_comments) CommentScoresByLink.set_scores(link, "_qa", scores_by_comment) timer.intermediate('scores') with CommentTree.mutation_context(link, timeout=180): try: timer.intermediate('lock') comment_tree = CommentTree.by_link(link, timer) timer.intermediate('get') if new_comments: comment_tree.add_comments(new_comments) for comment in deleted_comments: comment_tree.delete_comment(comment, link) timer.intermediate('update') except InconsistentCommentTreeError: # this exception occurs when we add a comment to the tree but # its parent isn't in the tree yet, need to rebuild the tree # from scratch comment_ids = [comment._id for comment in link_comments] g.log.exception( 'add_comments_nolock failed for link %s %s, recomputing', link_id, comment_ids) comment_tree = CommentTree.rebuild(link) timer.intermediate('rebuild') # the tree rebuild updated the link's comment count, so schedule # it for search reindexing link.update_search_index() timer.intermediate('update_search_index') g.stats.simple_event('comment_tree_inconsistent') # do this under the same lock because we want to ensure we are using # the same version of the CommentTree as was just written write_comment_orders(link, timer) timer.intermediate('write_order') timer.stop()
def add_comments(comments): """Add comments to the CommentTree and update scores.""" from r2.models.builder import write_comment_orders link_ids = [comment.link_id for comment in tup(comments)] links = Link._byID(link_ids, data=True) comments = tup(comments) comments_by_link_id = defaultdict(list) for comment in comments: comments_by_link_id[comment.link_id].append(comment) for link_id, link_comments in comments_by_link_id.iteritems(): link = links[link_id] new_comments = [ comment for comment in link_comments if not comment._deleted ] deleted_comments = [ comment for comment in link_comments if comment._deleted ] timer = g.stats.get_timer('comment_tree.add.%s' % link.comment_tree_version) timer.start() # write scores before CommentTree because the scores must exist for all # comments in the tree for sort in ("_controversy", "_confidence", "_score"): scores_by_comment = { comment._id36: getattr(comment, sort) for comment in link_comments } CommentScoresByLink.set_scores(link, sort, scores_by_comment) scores_by_comment = _get_qa_comment_scores(link, link_comments) CommentScoresByLink.set_scores(link, "_qa", scores_by_comment) timer.intermediate('scores') with CommentTree.mutation_context(link, timeout=180): try: timer.intermediate('lock') comment_tree = CommentTree.by_link(link, timer) timer.intermediate('get') if new_comments: comment_tree.add_comments(new_comments) for comment in deleted_comments: comment_tree.delete_comment(comment, link) timer.intermediate('update') except InconsistentCommentTreeError: # this exception occurs when we add a comment to the tree but # its parent isn't in the tree yet, need to rebuild the tree # from scratch comment_ids = [comment._id for comment in link_comments] g.log.exception( 'add_comments_nolock failed for link %s %s, recomputing', link_id, comment_ids) comment_tree = CommentTree.rebuild(link) timer.intermediate('rebuild') # the tree rebuild updated the link's comment count, so schedule # it for search reindexing link.update_search_index() timer.intermediate('update_search_index') g.stats.simple_event('comment_tree_inconsistent') # do this under the same lock because we want to ensure we are using # the same version of the CommentTree as was just written write_comment_orders(link) timer.intermediate('write_order') timer.stop()
def add_comments_nolock(link_id, comments): cids, comment_tree, depth, num_children = link_comments(link_id) #dfs to find the list of parents for the new comment def find_parents(): stack = [cid for cid in comment_tree[None]] parents = [] while stack: cur_cm = stack.pop() if cur_cm == cm_id: return parents elif cur_cm in comment_tree: #make cur_cm the end of the parents list parents = parents[:depth[cur_cm]] + [cur_cm] for child in comment_tree[cur_cm]: stack.append(child) new_parents = {} for comment in comments: cm_id = comment._id p_id = comment.parent_id #make sure we haven't already done this before (which would happen #if the tree isn't cached when you add a comment) if comment._id in cids: continue #add to comment list cids.append(comment._id) #add to tree comment_tree.setdefault(p_id, []).append(cm_id) #add to depth depth[cm_id] = depth[p_id] + 1 if p_id else 0 #update children num_children[cm_id] = 0 #if this comment had a parent, find the parent's parents if p_id: new_parents[cm_id] = p_id for p_id in find_parents(): num_children[p_id] += 1 # update our cache of children -> parents as well: key = parent_comments_key(link_id) r = g.permacache.get(key) if not r: r = _parent_dict_from_tree(comment_tree) for cm_id, parent_id in new_parents.iteritems(): # print "Now, I set %s -> %s" % (cm_id, parent_id) r[cm_id] = parent_id for comment in comments: cm_id = comment._id if cm_id not in new_parents: r[cm_id] = None # print "And I set %s -> None" % cm_id # update the link's comment count and schedule it for search reindexing link = Link._byID(link_id, data = True) link._incr('num_comments', len(comments)) from r2.lib.db.queries import changed changed(link) g.permacache.set(key, r) g.permacache.set(comments_key(link_id), (cids, comment_tree, depth, num_children))