def _calculate_qa_comment_scores(link, cid_tree, comments): """Return a dict of comment_id36 -> qa score""" # Responder is usually the OP, but there could be support for adding # other answerers in the future. responder_ids = link.responder_ids # An OP response will change the sort value for its parent, so we need # to process the parent, too. parent_cids = [] for comment in comments: if comment.author_id in responder_ids and comment.parent_id: parent_cids.append(comment.parent_id) parent_comments = Comment._byID(parent_cids, return_dict=False) comments.extend(parent_comments) # Fetch the comments in batch to avoid a bunch of separate calls down # the line. all_child_cids = [] for comment in comments: child_cids = cid_tree.get(comment._id, None) if child_cids: all_child_cids.extend(child_cids) all_child_comments = Comment._byID(all_child_cids) comment_sorter = {} for comment in comments: child_cids = cid_tree.get(comment._id, ()) child_comments = (all_child_comments[cid] for cid in child_cids) sort_value = comment._qa(child_comments, responder_ids) comment_sorter[comment._id36] = sort_value return comment_sorter
def run(): STEP = 100 thing = Link max_id = max_thing_id(thing) id_start = 0 for id_low in xrange(id_start, max_id + 1, STEP): print "Add desc karma for links %s to %s" % (id_low, id_low + STEP) links = list(query_thing_id_range(thing, id_low, id_low + STEP)) for link in links: if not link._loaded: link._load() comments = list(Comment._query(Comment.c.link_id == link._id, eager_load = True)) link_descendant_karma = 0 for comment in comments: if not comment._loaded: comment._load() if hasattr(comment, 'parent_id') and comment.parent_id: Comment._byID(comment.parent_id).incr_descendant_karma([], comment._ups - comment._downs) link_descendant_karma += (comment._ups - comment._downs) link._incr('_descendant_karma', link_descendant_karma)
def update_score(obj, up_change, down_change, new_valid_thing, old_valid_thing): obj._incr('_ups', up_change) obj._incr('_downs', down_change) if isinstance(obj, Comment): if hasattr(obj, 'parent_id'): Comment._byID(obj.parent_id).incr_descendant_karma([], up_change - down_change) Link._byID(obj.link_id)._incr('_descendant_karma', up_change - down_change)
def run(): STEP = 100 thing = Link max_id = max_thing_id(thing) id_start = 0 for id_low in xrange(id_start, max_id + 1, STEP): print "Add desc karma for links %s to %s" % (id_low, id_low + STEP) links = list(query_thing_id_range(thing, id_low, id_low + STEP)) for link in links: if not link._loaded: link._load() comments = list( Comment._query(Comment.c.link_id == link._id, eager_load=True)) link_descendant_karma = 0 for comment in comments: if not comment._loaded: comment._load() if hasattr(comment, 'parent_id') and comment.parent_id: Comment._byID(comment.parent_id).incr_descendant_karma( [], comment._ups - comment._downs) link_descendant_karma += (comment._ups - comment._downs) link._incr('_descendant_karma', link_descendant_karma)
def _comment_sorter_from_cids(comments, sort, link, cid_tree, by_36=False): """Retrieve sort values for comments. Useful to fill in any gaps in CommentSortsCache. Arguments: * comments -- an iterable of Comments to retrieve sort values for. * sort -- a string representing the type of sort to use. * cid_tree -- a mapping from parent id to children ids, as created by CommentTree. * by_36 -- a boolean indicating if the resultant map keys off of base 36 ids instead of integer ids. Returns a dictionary from cid to a numeric sort value. """ # The Q&A sort requires extra information about surrounding comments. It's # more efficient to gather it up here instead of in the guts of the comment # sort, but we don't want to do that for sort types that don't need it. if sort == '_qa': # An OP response will change the sort value for its parent, so we need # to process the parent, too. parent_cids = [] responder_ids = link.responder_ids for c in comments: if c.author_id in responder_ids and c.parent_id: parent_cids.append(c.parent_id) parent_comments = Comment._byID(parent_cids, data=True, return_dict=False) comments.extend(parent_comments) # Fetch the comments in batch to avoid a bunch of separate calls down # the line. all_child_cids = [] for c in comments: child_cids = cid_tree.get(c._id, None) if child_cids: all_child_cids.extend(child_cids) all_child_comments = Comment._byID(all_child_cids, data=True) comment_sorter = {} for comment in comments: if sort == '_qa': child_cids = cid_tree.get(comment._id, ()) child_comments = (all_child_comments[cid] for cid in child_cids) sort_value = _get_sort_value(comment, sort, link, child_comments) else: sort_value = _get_sort_value(comment, sort) if by_36: id = comment._id36 else: id = comment._id comment_sorter[id] = sort_value return comment_sorter
def get_comment_scores(link, sort, comment_ids, timer): """Retrieve cached sort values for all comments on a post. Arguments: * link_id -- id of the Link containing the comments. * sort -- a string indicating the attribute on the comments to use for generating sort values. Returns a dictionary from cid to a numeric sort value. """ from r2.lib.db import queries from r2.models import CommentScoresByLink if not comment_ids: # no comments means no scores return {} if sort == "_date": # comment ids are monotonically increasing, so we can use them as a # substitute for creation date scores_by_id = {comment_id: comment_id for comment_id in comment_ids} else: scores_by_id36 = CommentScoresByLink.get_scores(link, sort) # we store these id36ed, but there are still bits of the code that # want to deal in integer IDs scores_by_id = {int(id36, 36): score for id36, score in scores_by_id36.iteritems()} scores_needed = set(comment_ids) - set(scores_by_id.keys()) if scores_needed: # some scores were missing from CommentScoresByLink--lookup the # comments and calculate the scores. g.stats.simple_event("comment_tree_bad_sorter") missing_comments = Comment._byID(scores_needed, data=True, return_dict=False) if sort == "_qa": scores_by_missing_id36 = _get_qa_comment_scores(link, missing_comments) scores_by_missing = {int(id36, 36): score for id36, score in scores_by_missing_id36.iteritems()} else: scores_by_missing_id36 = {comment._id36: getattr(comment, sort) for comment in missing_comments} scores_by_missing = {int(id36, 36): score for id36, score in scores_by_missing_id36.iteritems()} # up to once per minute write the scores to limit writes but # eventually return us to the correct state. if not g.disallow_db_writes: write_key = "lock:score_{link}{sort}".format(link=link._id36, sort=sort) should_write = g.lock_cache.add(write_key, "", time=60) if should_write: CommentScoresByLink.set_scores(link, sort, scores_by_missing_id36) scores_by_id.update(scores_by_missing) timer.intermediate("sort") return scores_by_id
def upgrade(cls, tree, link): cids = [] for parent, children in tree.tree.iteritems(): cids.extend(children) comments = {} for i in xrange(0, len(cids), 100): g.log.debug(' loading comments %d..%d', i, i + 100) comments.update(Comment._byID(cids[i:i + 100], data=True)) # need to fill in parents attr for each comment modified = [] stack = [None] while stack: pid = stack.pop() if pid is None: parents = '' else: parents = comments[pid].parents + ':' + comments[pid]._id36 children = tree.tree.get(pid, []) stack.extend(children) for cid in children: if comments[cid].parents != parents: comments[cid].parents = parents modified.append(comments[cid]) for i, comment in enumerate(modified): comment._commit() cls.add_comments(tree, comments.values())
def upgrade(cls, tree, link): cids = [] for parent, children in tree.tree.iteritems(): cids.extend(children) comments = {} for i in xrange(0, len(cids), 100): g.log.debug(' loading comments %d..%d', i, i + 100) comments.update(Comment._byID(cids[i:i + 100], data=True)) cls.add_comments(tree, comments.values())
def _comment_sorter_from_cids(cids, sort): """Retrieve sort values for comments. Useful to fill in any gaps in CommentSortsCache. Arguments: * comments -- an iterable of Comments to retrieve sort values for. * sort -- a string representing the type of sort to use. * cid_tree -- a mapping from parent id to children ids, as created by CommentTree. * by_36 -- a boolean indicating if the resultant map keys off of base 36 ids instead of integer ids. Returns a dictionary from cid to a numeric sort value. """ comments = Comment._byID(cids, data=False, return_dict=False) return dict((x._id, _get_sort_value(x, sort)) for x in comments)
def _comment_sorter_from_cids(cids, sort): """Retrieve sort values for comments. Useful to fill in any gaps in CommentSortsCache. Arguments: * comments -- an iterable of Comments to retrieve sort values for. * sort -- a string representing the type of sort to use. * cid_tree -- a mapping from parent id to children ids, as created by CommentTree. * by_36 -- a boolean indicating if the resultant map keys off of base 36 ids instead of integer ids. Returns a dictionary from cid to a numeric sort value. """ comments = Comment._byID(cids, data = False, return_dict = False) return dict((x._id, _get_sort_value(x, sort)) for x in comments)
def _populate(after_id=None, estimate=54301242): from r2.models import desc from r2.lib.db import tdb_cassandra from r2.lib import utils # larger has a chance to decrease the number of Cassandra writes, # but the probability is low chunk_size = 5000 q = Comment._query(Comment.c._spam == (True, False), Comment.c._deleted == (True, False), sort=desc("_date")) if after_id is not None: q._after(Comment._byID(after_id)) q = utils.fetch_things2(q, chunk_size=chunk_size) q = utils.progress(q, verbosity=chunk_size, estimate=estimate) for chunk in utils.in_chunks(q, chunk_size): chunk = filter(lambda x: hasattr(x, "link_id"), chunk) update_comment_votes(chunk)
def _populate(after_id=None, estimate=54301242): from r2.models import desc from r2.lib.db import tdb_cassandra from r2.lib import utils # larger has a chance to decrease the number of Cassandra writes, # but the probability is low chunk_size = 5000 q = Comment._query(Comment.c._spam == (True, False), Comment.c._deleted == (True, False), sort=desc('_date')) if after_id is not None: q._after(Comment._byID(after_id)) q = utils.fetch_things2(q, chunk_size=chunk_size) q = utils.progress(q, verbosity=chunk_size, estimate=estimate) for chunk in utils.in_chunks(q, chunk_size): chunk = filter(lambda x: hasattr(x, 'link_id'), chunk) update_comment_votes(chunk)
def get_comment_scores(link, sort, comment_ids, timer): """Retrieve cached sort values for all comments on a post. Arguments: * link_id -- id of the Link containing the comments. * sort -- a string indicating the attribute on the comments to use for generating sort values. Returns a dictionary from cid to a numeric sort value. """ from r2.lib.db import queries from r2.models import CommentScoresByLink if not comment_ids: # no comments means no scores return {} if sort == "_date": # comment ids are monotonically increasing, so we can use them as a # substitute for creation date scores_by_id = {comment_id: comment_id for comment_id in comment_ids} else: scores_by_id36 = CommentScoresByLink.get_scores(link, sort) # we store these id36ed, but there are still bits of the code that # want to deal in integer IDs scores_by_id = { int(id36, 36): score for id36, score in scores_by_id36.iteritems() } scores_needed = set(comment_ids) - set(scores_by_id.keys()) if scores_needed: # some scores were missing from CommentScoresByLink--lookup the # comments and calculate the scores. g.stats.simple_event('comment_tree_bad_sorter') missing_comments = Comment._byID(scores_needed, data=True, return_dict=False) if sort == "_qa": scores_by_missing_id36 = _get_qa_comment_scores( link, missing_comments) scores_by_missing = { int(id36, 36): score for id36, score in scores_by_missing_id36.iteritems() } else: scores_by_missing_id36 = { comment._id36: getattr(comment, sort) for comment in missing_comments } scores_by_missing = { int(id36, 36): score for id36, score in scores_by_missing_id36.iteritems() } # up to once per minute write the scores to limit writes but # eventually return us to the correct state. if not g.disallow_db_writes: write_key = "lock:score_{link}{sort}".format( link=link._id36, sort=sort, ) should_write = g.lock_cache.add(write_key, "", time=60) if should_write: CommentScoresByLink.set_scores(link, sort, scores_by_missing_id36) scores_by_id.update(scores_by_missing) timer.intermediate('sort') return scores_by_id
def link_comments_and_sort(link, sort): from r2.models import CommentSortsCache # This has grown sort of organically over time. Right now the # cache of the comments tree consists in three keys: # 1. The comments_key: A tuple of # (cids, comment_tree, depth, num_children) # given: # cids =:= [comment_id] # comment_tree =:= dict(comment_id -> [comment_id]) # depth =:= dict(comment_id -> int depth) # num_children =:= dict(comment_id -> int num_children) # 2. The parent_comments_key =:= dict(comment_id -> parent_id) # 3. The comments_sorts keys =:= dict(comment_id36 -> float). # These are represented by a Cassandra model # (CommentSortsCache) rather than a permacache key. One of # these exists for each sort (hot, new, etc) timer = g.stats.get_timer('comment_tree.get.%s' % link.comment_tree_version) timer.start() link_id = link._id cache = get_comment_tree(link, timer=timer) cids = cache.cids tree = cache.tree depth = cache.depth num_children = cache.num_children parents = cache.parents # load the sorter sorter = _get_comment_sorter(link_id, sort) sorter_needed = [] if cids and not sorter: sorter_needed = cids g.log.debug("comment_tree.py: sorter (%s) cache miss for Link %s" % (sort, link_id)) sorter = {} sorter_needed = [x for x in cids if x not in sorter] if cids and sorter_needed: g.log.debug( "Error in comment_tree: sorter %r inconsistent (missing %d e.g. %r)" % (sort_comments_key( link_id, sort), len(sorter_needed), sorter_needed[:10])) if not g.disallow_db_writes: update_comment_votes( Comment._byID(sorter_needed, data=True, return_dict=False)) sorter.update(_comment_sorter_from_cids(sorter_needed, sort)) timer.intermediate('sort') if parents is None: g.log.debug("comment_tree.py: parents cache miss for Link %s" % link_id) parents = {} elif cids and not all(x in parents for x in cids): g.log.debug("Error in comment_tree: parents inconsistent for Link %s" % link_id) parents = {} if not parents and len(cids) > 0: with CommentTree.mutation_context(link): # reload under lock so the sorter and parents are consistent timer.intermediate('lock') cache = get_comment_tree(link, timer=timer) cache.parents = cache.parent_dict_from_tree(cache.tree) timer.stop() return (cache.cids, cache.tree, cache.depth, cache.num_children, cache.parents, sorter)
def _comment_sorter_from_cids(cids, sort): comments = Comment._byID(cids, data=False, return_dict=False) return dict((x._id, _get_sort_value(x, sort)) for x in comments)
def link_comments_and_sort(link, sort): """Fetch and sort the comments on a post. Arguments: * link -- the Link whose comments we want to sort. * sort -- a string indicating the attribute on the comments to use for generating sort values. Returns a tuple in the form (cids, cid_tree, depth, parents, sorter), where the values are as follows: * cids -- a list of the ids of all comments in the thread. * cid_tree -- a dictionary from parent cid to children cids. * depth -- a dictionary from cid to the depth that comment resides in the tree. A top-level comment has depth 0. * parents -- a dictionary from child cid to parent cid. * sorter -- a dictionary from cid to a numeric value to be used for sorting. """ # This has grown sort of organically over time. Right now the # cache of the comments tree consists in three keys: # 1. The comments_key: A tuple of # (cids, comment_tree, depth) # given: # cids =:= [comment_id] # comment_tree =:= dict(comment_id -> [comment_id]) # depth =:= dict(comment_id -> int depth) # 2. The parent_comments_key =:= dict(comment_id -> parent_id) # 3. The comments_sorts keys =:= dict(comment_id36 -> float). # These are represented by a Cassandra model # (CommentScoresByLink) rather than a permacache key. One of # these exists for each sort (hot, new, etc) timer = g.stats.get_timer('comment_tree.get.%s' % link.comment_tree_version) timer.start() cache = get_comment_tree(link, timer=timer) cids = cache.cids tree = cache.tree depth = cache.depth parents = cache.parents # load the sorter sorter = _get_comment_sorter(link, sort) # find comments for which the sort values weren't in the cache sorter_needed = [] if cids and not sorter: sorter_needed = cids g.log.debug("comment_tree.py: sorter %s cache miss for %s", sort, link) sorter = {} sorter_needed = [x for x in cids if x not in sorter] if cids and sorter_needed: g.log.debug( "Error in comment_tree: sorter %s/%s inconsistent (missing %d e.g. %r)" % (link, sort, len(sorter_needed), sorter_needed[:10])) g.stats.simple_event('comment_tree_bad_sorter') if not g.disallow_db_writes: update_comment_votes(Comment._byID(sorter_needed, data=True, return_dict=False)) # The Q&A sort needs access to attributes the others don't, so save the # extra lookups if we can. data_needed = (sort == '_qa') comments = Comment._byID(sorter_needed, data=data_needed, return_dict=False) sorter.update(_comment_sorter_from_cids(comments, sort, link, tree)) timer.intermediate('sort') timer.stop() return (cache.cids, cache.tree, cache.depth, cache.parents, sorter)
def link_comments_and_sort(link, sort): """Fetch and sort the comments on a post. Arguments: * link -- the Link whose comments we want to sort. * sort -- a string indicating the attribute on the comments to use for generating sort values. Returns a tuple in the form (cids, cid_tree, depth, parents, sorter), where the values are as follows: * cids -- a list of the ids of all comments in the thread. * cid_tree -- a dictionary from parent cid to children cids. * depth -- a dictionary from cid to the depth that comment resides in the tree. A top-level comment has depth 0. * parents -- a dictionary from child cid to parent cid. * sorter -- a dictionary from cid to a numeric value to be used for sorting. """ # This has grown sort of organically over time. Right now the # cache of the comments tree consists in three keys: # 1. The comments_key: A tuple of # (cids, comment_tree, depth) # given: # cids =:= [comment_id] # comment_tree =:= dict(comment_id -> [comment_id]) # depth =:= dict(comment_id -> int depth) # 2. The parent_comments_key =:= dict(comment_id -> parent_id) # 3. The comments_sorts keys =:= dict(comment_id36 -> float). # These are represented by a Cassandra model # (CommentScoresByLink) rather than a permacache key. One of # these exists for each sort (hot, new, etc) timer = g.stats.get_timer('comment_tree.get.%s' % link.comment_tree_version) timer.start() cache = get_comment_tree(link, timer=timer) cids = cache.cids tree = cache.tree depth = cache.depth parents = cache.parents # load the sorter sorter = _get_comment_sorter(link, sort) # find comments for which the sort values weren't in the cache sorter_needed = [] if cids and not sorter: sorter_needed = cids g.log.debug("comment_tree.py: sorter %s cache miss for %s", sort, link) sorter = {} sorter_needed = [x for x in cids if x not in sorter] if cids and sorter_needed: g.log.debug( "Error in comment_tree: sorter %s/%s inconsistent (missing %d e.g. %r)" % (link, sort, len(sorter_needed), sorter_needed[:10])) if not g.disallow_db_writes: update_comment_votes( Comment._byID(sorter_needed, data=True, return_dict=False)) # The Q&A sort needs access to attributes the others don't, so save the # extra lookups if we can. data_needed = (sort == '_qa') comments = Comment._byID(sorter_needed, data=data_needed, return_dict=False) sorter.update(_comment_sorter_from_cids(comments, sort, link, tree)) timer.intermediate('sort') if parents is None: g.log.debug("comment_tree.py: parents cache miss for %s", link) parents = {} elif cids and not all(x in parents for x in cids): g.log.debug("Error in comment_tree: parents inconsistent for %s", link) parents = {} if not parents and len(cids) > 0: with CommentTree.mutation_context(link): # reload under lock so the sorter and parents are consistent timer.intermediate('lock') cache = get_comment_tree(link, timer=timer) cache.parents = cache.parent_dict_from_tree(cache.tree) timer.stop() return (cache.cids, cache.tree, cache.depth, cache.parents, sorter)
def get_comment_scores(link, sort, comment_ids, timer): """Retrieve cached sort values for all comments on a post. Arguments: * link_id -- id of the Link containing the comments. * sort -- a string indicating the attribute on the comments to use for generating sort values. Returns a dictionary from cid to a numeric sort value. """ from r2.lib.db import queries from r2.models import CommentScoresByLink if not comment_ids: # no comments means no scores return {} if sort == "_date": # comment ids are monotonically increasing, so we can use them as a # substitute for creation date scores_by_id = {comment_id: comment_id for comment_id in comment_ids} else: scores_by_id36 = CommentScoresByLink.get_scores(link, sort) # we store these id36ed, but there are still bits of the code that # want to deal in integer IDs scores_by_id = { int(id36, 36): score for id36, score in scores_by_id36.iteritems() } scores_needed = set(comment_ids) - set(scores_by_id.keys()) if scores_needed: g.stats.simple_event('comment_tree_bad_sorter') missing_comments = Comment._byID(scores_needed, data=True, return_dict=False) # queue the missing comments to be added to the comments tree, which # will trigger adding their scores for comment in missing_comments: queries.add_to_commentstree_q(comment) if sort == "_qa": scores_by_missing_id36 = _get_qa_comment_scores( link, missing_comments) scores_by_missing = { int(id36, 36): score for id36, score in scores_by_missing_id36.iteritems() } else: scores_by_missing = { comment._id: getattr(comment, sort) for comment in missing_comments } scores_by_id.update(scores_by_missing) timer.intermediate('sort') return scores_by_id
def link_comments_and_sort(link, sort): from r2.models import CommentSortsCache # This has grown sort of organically over time. Right now the # cache of the comments tree consists in three keys: # 1. The comments_key: A tuple of # (cids, comment_tree, depth, num_children) # given: # cids =:= [comment_id] # comment_tree =:= dict(comment_id -> [comment_id]) # depth =:= dict(comment_id -> int depth) # num_children =:= dict(comment_id -> int num_children) # 2. The parent_comments_key =:= dict(comment_id -> parent_id) # 3. The comments_sorts keys =:= dict(comment_id36 -> float). # These are represented by a Cassandra model # (CommentSortsCache) rather than a permacache key. One of # these exists for each sort (hot, new, etc) timer = g.stats.get_timer('comment_tree.get.%s' % link.comment_tree_version) timer.start() link_id = link._id cache = get_comment_tree(link, timer=timer) cids = cache.cids tree = cache.tree depth = cache.depth num_children = cache.num_children parents = cache.parents # load the sorter sorter = _get_comment_sorter(link_id, sort) sorter_needed = [] if cids and not sorter: sorter_needed = cids g.log.debug("comment_tree.py: sorter (%s) cache miss for Link %s" % (sort, link_id)) sorter = {} sorter_needed = [x for x in cids if x not in sorter] if cids and sorter_needed: g.log.debug( "Error in comment_tree: sorter %r inconsistent (missing %d e.g. %r)" % (sort_comments_key(link_id, sort), len(sorter_needed), sorter_needed[:10])) if not g.disallow_db_writes: update_comment_votes(Comment._byID(sorter_needed, data=True, return_dict=False)) sorter.update(_comment_sorter_from_cids(sorter_needed, sort)) timer.intermediate('sort') if parents is None: g.log.debug("comment_tree.py: parents cache miss for Link %s" % link_id) parents = {} elif cids and not all(x in parents for x in cids): g.log.debug("Error in comment_tree: parents inconsistent for Link %s" % link_id) parents = {} if not parents and len(cids) > 0: with CommentTree.mutation_context(link): # reload under lock so the sorter and parents are consistent timer.intermediate('lock') cache = get_comment_tree(link, timer=timer) cache.parents = cache.parent_dict_from_tree(cache.tree) timer.stop() return (cache.cids, cache.tree, cache.depth, cache.num_children, cache.parents, sorter)
def _comment_sorter_from_cids(cids, sort): comments = Comment._byID(cids, data = False, return_dict = False) return dict((x._id, _get_sort_value(x, sort)) for x in comments)
def link_comments_and_sort(link, sort): """Fetch and sort the comments on a post. Arguments: * link -- the Link whose comments we want to sort. * sort -- a string indicating the attribute on the comments to use for generating sort values. Returns a tuple in the form (cids, cid_tree, depth, parents, sorter), where the values are as follows: * cids -- a list of the ids of all comments in the thread. * cid_tree -- a dictionary from parent cid to children cids. * depth -- a dictionary from cid to the depth that comment resides in the tree. A top-level comment has depth 0. * parents -- a dictionary from child cid to parent cid. * sorter -- a dictionary from cid to a numeric value to be used for sorting. """ from r2.models import CommentSortsCache # This has grown sort of organically over time. Right now the # cache of the comments tree consists in three keys: # 1. The comments_key: A tuple of # (cids, comment_tree, depth) # given: # cids =:= [comment_id] # comment_tree =:= dict(comment_id -> [comment_id]) # depth =:= dict(comment_id -> int depth) # 2. The parent_comments_key =:= dict(comment_id -> parent_id) # 3. The comments_sorts keys =:= dict(comment_id36 -> float). # These are represented by a Cassandra model # (CommentSortsCache) rather than a permacache key. One of # these exists for each sort (hot, new, etc) timer = g.stats.get_timer('comment_tree.get.%s' % link.comment_tree_version) timer.start() link_id = link._id cache = get_comment_tree(link, timer=timer) cids = cache.cids tree = cache.tree depth = cache.depth parents = cache.parents # load the sorter sorter = _get_comment_sorter(link_id, sort) # find comments for which the sort values weren't in the cache sorter_needed = [] if cids and not sorter: sorter_needed = cids g.log.debug("comment_tree.py: sorter (%s) cache miss for Link %s" % (sort, link_id)) sorter = {} sorter_needed = [x for x in cids if x not in sorter] if cids and sorter_needed: g.log.debug( "Error in comment_tree: sorter %r inconsistent (missing %d e.g. %r)" % (sort_comments_key(link_id, sort), len(sorter_needed), sorter_needed[:10])) if not g.disallow_db_writes: update_comment_votes(Comment._byID(sorter_needed, data=True, return_dict=False)) comments = Comment._byID(sorter_needed, data = False, return_dict = False) sorter.update(_comment_sorter_from_cids(comments, sort, link, tree)) timer.intermediate('sort') if parents is None: g.log.debug("comment_tree.py: parents cache miss for Link %s" % link_id) parents = {} elif cids and not all(x in parents for x in cids): g.log.debug("Error in comment_tree: parents inconsistent for Link %s" % link_id) parents = {} if not parents and len(cids) > 0: with CommentTree.mutation_context(link): # reload under lock so the sorter and parents are consistent timer.intermediate('lock') cache = get_comment_tree(link, timer=timer) cache.parents = cache.parent_dict_from_tree(cache.tree) timer.stop() return (cache.cids, cache.tree, cache.depth, cache.parents, sorter)
def get_comment_scores(link, sort, comment_ids, timer): """Retrieve cached sort values for all comments on a post. Arguments: * link_id -- id of the Link containing the comments. * sort -- a string indicating the attribute on the comments to use for generating sort values. Returns a dictionary from cid to a numeric sort value. """ from r2.lib.db import queries from r2.models import CommentScoresByLink if not comment_ids: # no comments means no scores return {} if sort == "_date": # comment ids are monotonically increasing, so we can use them as a # substitute for creation date scores_by_id = {comment_id: comment_id for comment_id in comment_ids} else: scores_by_id36 = CommentScoresByLink.get_scores(link, sort) # we store these id36ed, but there are still bits of the code that # want to deal in integer IDs scores_by_id = { int(id36, 36): score for id36, score in scores_by_id36.iteritems() } scores_needed = set(comment_ids) - set(scores_by_id.keys()) if scores_needed: g.stats.simple_event('comment_tree_bad_sorter') missing_comments = Comment._byID( scores_needed, data=True, return_dict=False) # queue the missing comments to be added to the comments tree, which # will trigger adding their scores for comment in missing_comments: queries.add_to_commentstree_q(comment) if sort == "_qa": scores_by_missing_id36 = _get_qa_comment_scores( link, missing_comments) scores_by_missing = { int(id36, 36): score for id36, score in scores_by_missing_id36.iteritems() } else: scores_by_missing = { comment._id: getattr(comment, sort) for comment in missing_comments } scores_by_id.update(scores_by_missing) timer.intermediate('sort') return scores_by_id