def add_comments(comments): """Add comments to the CommentTree and update scores.""" from r2.models.builder import write_comment_orders link_ids = [comment.link_id for comment in tup(comments)] links = Link._byID(link_ids, data=True) comments = tup(comments) comments_by_link_id = defaultdict(list) for comment in comments: comments_by_link_id[comment.link_id].append(comment) for link_id, link_comments in comments_by_link_id.iteritems(): link = links[link_id] timer = g.stats.get_timer( 'comment_tree.add.%s' % link.comment_tree_version) timer.start() # write scores before CommentTree because the scores must exist for all # comments in the tree for sort in ("_controversy", "_confidence", "_score"): scores_by_comment = { comment._id36: getattr(comment, sort) for comment in link_comments } CommentScoresByLink.set_scores(link, sort, scores_by_comment) scores_by_comment = _get_qa_comment_scores(link, link_comments) CommentScoresByLink.set_scores(link, "_qa", scores_by_comment) timer.intermediate('scores') with CommentTree.mutation_context(link, timeout=180): try: timer.intermediate('lock') comment_tree = CommentTree.by_link(link, timer) timer.intermediate('get') comment_tree.add_comments(link_comments) timer.intermediate('update') except InconsistentCommentTreeError: # failed to add a comment to the CommentTree because its parent # is missing from the tree. this comment will be lost forever # unless a rebuild is performed. comment_ids = [comment._id for comment in link_comments] g.log.error( "comment_tree_inconsistent: %s %s" % (link, comment_ids)) g.stats.simple_event('comment_tree_inconsistent') return # do this under the same lock because we want to ensure we are using # the same version of the CommentTree as was just written write_comment_orders(link) timer.intermediate('write_order') timer.stop()
def add_comments(comments): """Add comments to the CommentTree and update scores.""" from r2.models.builder import write_comment_orders link_ids = [comment.link_id for comment in tup(comments)] links = Link._byID(link_ids, data=True) comments = tup(comments) comments_by_link_id = defaultdict(list) for comment in comments: comments_by_link_id[comment.link_id].append(comment) for link_id, link_comments in comments_by_link_id.iteritems(): link = links[link_id] timer = g.stats.get_timer("comment_tree.add.1") timer.start() # write scores before CommentTree because the scores must exist for all # comments in the tree for sort in ("_controversy", "_confidence", "_score"): scores_by_comment = {comment._id36: getattr(comment, sort) for comment in link_comments} CommentScoresByLink.set_scores(link, sort, scores_by_comment) scores_by_comment = _get_qa_comment_scores(link, link_comments) CommentScoresByLink.set_scores(link, "_qa", scores_by_comment) timer.intermediate("scores") CommentTree.add_comments(link, link_comments) timer.intermediate("update") write_comment_orders(link) timer.intermediate("write_order") timer.stop()
def add_comments(comments): """Add comments to the CommentTree and update scores.""" from r2.models.builder import write_comment_orders link_ids = [comment.link_id for comment in tup(comments)] links_by_id = Link._byID(link_ids) comments = tup(comments) comments_by_link_id = defaultdict(list) for comment in comments: comments_by_link_id[comment.link_id].append(comment) for link_id, link_comments in comments_by_link_id.iteritems(): link = links_by_id[link_id] timer = g.stats.get_timer('comment_tree.add.1') timer.start() write_comment_scores(link, link_comments) timer.intermediate('scores') CommentTree.add_comments(link, link_comments) timer.intermediate('update') write_comment_orders(link) timer.intermediate('write_order') timer.stop()
def update_score(obj, up_change, down_change, new_valid_thing, old_valid_thing): obj._incr('_ups', up_change) obj._incr('_downs', down_change) if isinstance(obj, Comment): if hasattr(obj, 'parent_id'): Comment._byID(obj.parent_id).incr_descendant_karma([], up_change - down_change) Link._byID(obj.link_id)._incr('_descendant_karma', up_change - down_change)
def calc_rising(): #As far as I can tell this can only ever return a series of 0's as that is what is hard coded in... In which case nothing should ever be rising unless I explicitly make it so. sr_count = count.get_link_counts() link_count = dict((k, v[0]) for k,v in sr_count.iteritems()) link_names = Link._by_fullname(sr_count.keys(), data=True) #max is half the average of the top 10 counts counts = link_count.values() counts.sort(reverse=True) maxcount = sum(counts[:10]) / 2.*min(10,len(counts)) #prune the list print link_count print link_names print maxcount rising = [(n, link_names[n].sr_id) for n in link_names.keys() if False or link_count[n] < maxcount] print rising cur_time = datetime.now(g.tz) def score(pair): name = pair[0] link = link_names[name] hours = (cur_time - link._date).seconds / 3600 + 1 return float(link._ups) / (max(link_count[name], 1) * hours) def r(x): return 1 if x > 0 else -1 if x < 0 else 0 rising.sort(lambda x, y: r(score(y) - score(x))) return rising
def calc_rising(): sr_count = count.get_link_counts() link_count = dict((k, v[0]) for k,v in sr_count.iteritems()) link_names = Link._by_fullname(sr_count.keys(), data=True) #max is half the average of the top 10 counts counts = link_count.values() counts.sort(reverse=True) maxcount = sum(counts[:10]) / 20 #prune the list rising = [(n, link_names[n].sr_id) for n in link_names.keys() if link_count[n] < maxcount] cur_time = datetime.now(g.tz) def score(pair): name = pair[0] link = link_names[name] hours = (cur_time - link._date).seconds / 3600 + 1 return float(link._ups) / (max(link_count[name], 1) * hours) def r(x): return 1 if x > 0 else -1 if x < 0 else 0 rising.sort(lambda x, y: r(score(y) - score(x))) return rising
def add_comments(comments): links = Link._byID([com.link_id for com in tup(comments)], data=True) comments = tup(comments) link_map = {} for com in comments: link_map.setdefault(com.link_id, []).append(com) for link_id, coms in link_map.iteritems(): link = links[link_id] timer = g.stats.get_timer('comment_tree.add.%s' % link.comment_tree_version) timer.start() try: with CommentTree.mutation_context(link): timer.intermediate('lock') cache = get_comment_tree(link, timer=timer) timer.intermediate('get') cache.add_comments(coms) timer.intermediate('update') except: g.log.exception( 'add_comments_nolock failed for link %s, recomputing tree', link_id) # calculate it from scratch get_comment_tree(link, _update=True, timer=timer) timer.stop() update_comment_votes(coms)
def link_comments(link_id, _update=False): key = comments_key(link_id) r = g.permacache.get(key) if r and not _update: return r else: # This operation can take longer than most (note the inner # locks) better to time out request temporarily than to deal # with an inconsistent tree with g.make_lock(lock_key(link_id), timeout=180): r = _load_link_comments(link_id) # rebuild parent dict cids, cid_tree, depth, num_children, num_comments = r r = r[: -1] # Remove num_comments from r; we don't need to cache it. g.permacache.set(parent_comments_key(link_id), _parent_dict_from_tree(cid_tree)) g.permacache.set(key, r) # update the link's comment count and schedule it for search # reindexing link = Link._byID(link_id, data=True) link.num_comments = num_comments link._commit() from r2.lib.db.queries import changed changed(link) return r
def add_comments(comments): links = Link._byID([com.link_id for com in tup(comments)], data=True) comments = tup(comments) link_map = {} for com in comments: link_map.setdefault(com.link_id, []).append(com) for link_id, coms in link_map.iteritems(): link = links[link_id] add_comments = [comment for comment in coms if not comment._deleted] delete_comments = (comment for comment in coms if comment._deleted) timer = g.stats.get_timer('comment_tree.add.%s' % link.comment_tree_version) timer.start() try: with CommentTree.mutation_context(link, timeout=30): timer.intermediate('lock') cache = get_comment_tree(link, timer=timer) timer.intermediate('get') if add_comments: cache.add_comments(add_comments) for comment in delete_comments: cache.delete_comment(comment, link) timer.intermediate('update') except InconsistentCommentTreeError: comment_ids = [comment._id for comment in coms] g.log.exception( 'add_comments_nolock failed for link %s %s, recomputing', link_id, comment_ids) rebuild_comment_tree(link, timer=timer) g.stats.simple_event('comment_tree_inconsistent') timer.stop() update_comment_votes(coms)
def GET_document(self): try: #no cookies on errors c.cookies.clear() code = request.GET.get('code', '') srname = request.GET.get('srname', '') takedown = request.GET.get('takedown', "") if srname: c.site = Subreddit._by_name(srname) if c.render_style not in self.allowed_render_styles: return str(code) elif takedown and code == '404': link = Link._by_fullname(takedown) return pages.TakedownPage(link).render() elif code == '403': return self.send403() elif code == '500': return redditbroke % rand_strings.sadmessages elif code == '503': c.response.status_code = 503 c.response.headers['Retry-After'] = 1 c.response.content = toofast return c.response elif code == '304': if request.GET.has_key('x-sup-id'): c.response.headers['x-sup-id'] = request.GET.get('x-sup-id') return c.response elif c.site: return self.send404() else: return "page not found" except: return handle_awful_failure("something really bad just happened.")
def update_comment_votes(comments, write_consistency_level = None): from r2.models import CommentSortsCache comments = tup(comments) link_map = {} for com in comments: link_map.setdefault(com.link_id, []).append(com) all_links = Link._byID(link_map.keys(), data=True) comment_trees = {} for link in all_links.values(): comment_trees[link._id] = get_comment_tree(link) for link_id, coms in link_map.iteritems(): link = all_links[link_id] for sort in ("_controversy", "_hot", "_confidence", "_score", "_date", "_qa"): cid_tree = comment_trees[link_id].tree sorter = _comment_sorter_from_cids(coms, sort, link, cid_tree, by_36=True) # Cassandra always uses the id36 instead of the integer # ID, so we'll map that first before sending it c_key = sort_comments_key(link_id, sort) CommentSortsCache._set_values(c_key, sorter, write_consistency_level = write_consistency_level)
def update_comment_votes(comments, write_consistency_level=None): from r2.models import CommentSortsCache, CommentScoresByLink comments = tup(comments) link_map = {} for com in comments: link_map.setdefault(com.link_id, []).append(com) all_links = Link._byID(link_map.keys(), data=True) comment_trees = {} for link in all_links.values(): comment_trees[link._id] = get_comment_tree(link) for link_id, coms in link_map.iteritems(): link = all_links[link_id] for sort in ("_controversy", "_hot", "_confidence", "_score", "_date", "_qa"): cid_tree = comment_trees[link_id].tree scores_by_comment = _comment_sorter_from_cids(coms, sort, link, cid_tree, by_36=True) # Cassandra always uses the id36 instead of the integer # ID, so we'll map that first before sending it c_key = sort_comments_key(link_id, sort) CommentSortsCache._set_values( c_key, scores_by_comment, write_consistency_level=write_consistency_level) CommentScoresByLink.set_scores(link, sort, scores_by_comment)
def process_new_links(period=media_period, force=False): """Fetches links from the last period and sets their media properities. If force is True, it will fetch properities for links even if the properties already exist""" links = Link._query(Link.c._date > timeago(period), sort=desc('_date'), data=True) results = {} jobs = [] for link in fetch_things2(links): if link.is_self or link.promoted: continue elif not force and (link.has_thumbnail or link.media_object): continue jobs.append(make_link_info_job(results, link, g.useragent)) #send links to a queue wq = WorkQueue(jobs, num_workers=20, timeout=30) wq.start() wq.jobs.join() #when the queue is finished, do the db writes in this thread for link, info in results.items(): update_link(link, info[0], info[1])
def update_comment_votes(comments): from r2.models import CommentScoresByLink comments = tup(comments) link_map = {} for com in comments: link_map.setdefault(com.link_id, []).append(com) all_links = Link._byID(link_map.keys(), data=True) comment_trees = {} for link in all_links.values(): comment_trees[link._id] = get_comment_tree(link) for link_id, coms in link_map.iteritems(): link = all_links[link_id] for sort in ("_controversy", "_hot", "_confidence", "_score", "_date", "_qa"): cid_tree = comment_trees[link_id].tree scores_by_comment = _comment_sorter_from_cids(coms, sort, link, cid_tree, by_36=True) CommentScoresByLink.set_scores(link, sort, scores_by_comment)
def add_comments(comments): links = Link._byID([com.link_id for com in tup(comments)], data=True) comments = tup(comments) link_map = {} for com in comments: link_map.setdefault(com.link_id, []).append(com) for link_id, coms in link_map.iteritems(): link = links[link_id] add_comments = [comment for comment in coms if not comment._deleted] delete_comments = (comment for comment in coms if comment._deleted) timer = g.stats.get_timer('comment_tree.add.%s' % link.comment_tree_version) timer.start() try: with CommentTree.mutation_context(link, timeout=30): timer.intermediate('lock') cache = get_comment_tree(link, timer=timer) timer.intermediate('get') if add_comments: cache.add_comments(add_comments) for comment in delete_comments: cache.delete_comment(comment, link) timer.intermediate('update') except InconsistentCommentTreeError: comment_ids = [comment._id for comment in coms] g.log.exception( 'add_comments_nolock failed for link %s %s, recomputing', link_id, comment_ids) rebuild_comment_tree(link, timer=timer) timer.stop() update_comment_votes(coms)
def link_comments(link_id, _update=False): key = comments_key(link_id) r = g.permacache.get(key) if r and not _update: return r else: # This operation can take longer than most (note the inner # locks) better to time out request temporarily than to deal # with an inconsistent tree with g.make_lock(lock_key(link_id), timeout=180): r = _load_link_comments(link_id) # rebuild parent dict cids, cid_tree, depth, num_children, num_comments = r r = r[:-1] # Remove num_comments from r; we don't need to cache it. g.permacache.set(parent_comments_key(link_id), _parent_dict_from_tree(cid_tree)) g.permacache.set(key, r) # update the link's comment count and schedule it for search # reindexing link = Link._byID(link_id, data = True) link.num_comments = num_comments link._commit() from r2.lib.db.queries import changed changed(link) return r
def add_comments(comments): links = Link._byID([com.link_id for com in tup(comments)], data=True) comments = tup(comments) link_map = {} for com in comments: link_map.setdefault(com.link_id, []).append(com) for link_id, coms in link_map.iteritems(): link = links[link_id] add_comments = [comment for comment in coms if not comment._deleted] delete_comments = (comment for comment in coms if comment._deleted) timer = g.stats.get_timer("comment_tree.add.%s" % link.comment_tree_version) timer.start() try: with CommentTree.mutation_context(link): timer.intermediate("lock") cache = get_comment_tree(link, timer=timer) timer.intermediate("get") if add_comments: cache.add_comments(add_comments) for comment in delete_comments: cache.delete_comment(comment, link) timer.intermediate("update") except: g.log.exception("add_comments_nolock failed for link %s, recomputing tree", link_id) # calculate it from scratch get_comment_tree(link, _update=True, timer=timer) timer.stop() update_comment_votes(coms)
def GET_document(self): try: c.errors = c.errors or ErrorSet() # clear cookies the old fashioned way c.cookies = Cookies() code = request.GET.get('code', '') try: code = int(code) except ValueError: code = 404 srname = request.GET.get('srname', '') takedown = request.GET.get('takedown', "") # StatusBasedRedirect will override this anyway, but we need this # here for pagecache to see. response.status_int = code if srname: c.site = Subreddit._by_name(srname) if request.GET.has_key('allow_framing'): c.allow_framing = bool(request.GET['allow_framing'] == '1') if code in (204, 304): # NEVER return a content body on 204/304 or downstream # caches may become very confused. if request.GET.has_key('x-sup-id'): x_sup_id = request.GET.get('x-sup-id') if '\r\n' not in x_sup_id: response.headers['x-sup-id'] = x_sup_id return "" elif c.render_style not in self.allowed_render_styles: return str(code) elif c.render_style in extensions.API_TYPES: data = request.environ.get('extra_error_data', {'error': code}) if request.environ.get("WANT_RAW_JSON"): return scriptsafe_dumps(data) return websafe_json(json.dumps(data)) elif takedown and code == 404: link = Link._by_fullname(takedown) return pages.TakedownPage(link).render() elif code == 403: return self.send403() elif code == 429: return self.send429() elif code == 500: randmin = {'admin': random.choice(self.admins)} failien_url = make_failien_url() sad_message = safemarkdown(rand_strings.sadmessages % randmin) return redditbroke % (failien_url, sad_message) elif code == 503: return self.send503() elif c.site: return self.send404() else: return "page not found" except Exception as e: return handle_awful_failure("ErrorController.GET_document: %r" % e)
def add_comments(comments): """Add comments to the CommentTree and update scores.""" from r2.models.builder import write_comment_orders link_ids = [comment.link_id for comment in tup(comments)] links = Link._byID(link_ids, data=True) comments = tup(comments) comments_by_link_id = defaultdict(list) for comment in comments: comments_by_link_id[comment.link_id].append(comment) for link_id, link_comments in comments_by_link_id.iteritems(): link = links[link_id] timer = g.stats.get_timer('comment_tree.add.%s' % link.comment_tree_version) timer.start() # write scores before CommentTree because the scores must exist for all # comments in the tree for sort in ("_controversy", "_confidence", "_score"): scores_by_comment = { comment._id36: getattr(comment, sort) for comment in link_comments } CommentScoresByLink.set_scores(link, sort, scores_by_comment) scores_by_comment = _get_qa_comment_scores(link, link_comments) CommentScoresByLink.set_scores(link, "_qa", scores_by_comment) timer.intermediate('scores') with CommentTree.mutation_context(link, timeout=180): try: timer.intermediate('lock') comment_tree = CommentTree.by_link(link, timer) timer.intermediate('get') comment_tree.add_comments(link_comments) timer.intermediate('update') except InconsistentCommentTreeError: # failed to add a comment to the CommentTree because its parent # is missing from the tree. this comment will be lost forever # unless a rebuild is performed. comment_ids = [comment._id for comment in link_comments] g.log.error("comment_tree_inconsistent: %s %s" % (link, comment_ids)) g.stats.simple_event('comment_tree_inconsistent') return # do this under the same lock because we want to ensure we are using # the same version of the CommentTree as was just written write_comment_orders(link) timer.intermediate('write_order') timer.stop()
def GET_document(self): try: c.errors = c.errors or ErrorSet() # clear cookies the old fashioned way c.cookies = Cookies() code = request.GET.get("code", "") try: code = int(code) except ValueError: code = 404 srname = request.GET.get("srname", "") takedown = request.GET.get("takedown", "") # StatusBasedRedirect will override this anyway, but we need this # here for pagecache to see. response.status_int = code if srname: c.site = Subreddit._by_name(srname) if code in (204, 304): # NEVER return a content body on 204/304 or downstream # caches may become very confused. if request.GET.has_key("x-sup-id"): x_sup_id = request.GET.get("x-sup-id") if "\r\n" not in x_sup_id: response.headers["x-sup-id"] = x_sup_id return "" elif c.render_style not in self.allowed_render_styles: return str(code) elif c.render_style in extensions.API_TYPES: data = request.environ.get("extra_error_data", {"error": code}) return websafe_json(json.dumps(data)) elif takedown and code == 404: link = Link._by_fullname(takedown) return pages.TakedownPage(link).render() elif code == 403: return self.send403() elif code == 429: return self.send429() elif code == 500: randmin = {"admin": random.choice(self.admins)} failien_url = make_failien_url() return redditbroke % (failien_url, rand_strings.sadmessages % randmin) elif code == 503: return self.send503() elif c.site: return self.send404() else: return "page not found" except: return handle_awful_failure("something really bad just happened.")
def GET_document(self): try: # clear cookies the old fashioned way c.cookies = Cookies() code = request.GET.get('code', '') try: code = int(code) except ValueError: code = 404 srname = request.GET.get('srname', '') takedown = request.GET.get('takedown', "") if srname: c.site = Subreddit._by_name(srname) if c.render_style not in self.allowed_render_styles: if code not in (204, 304): c.response.content = str(code) c.response.status_code = code return c.response elif c.render_style == "api": data = request.environ.get('extra_error_data', {'error': code}) c.response.content = json.dumps(data) return c.response elif takedown and code == 404: link = Link._by_fullname(takedown) return pages.TakedownPage(link).render() elif code == 403: return self.send403() elif code == 429: return self.send429() elif code == 500: randmin = {'admin': rand.choice(self.admins)} failien_name = 'youbrokeit%d.png' % rand.randint( 1, NUM_FAILIENS) failien_url = static(failien_name) return redditbroke % (failien_url, rand_strings.sadmessages % randmin) elif code == 503: return self.send503() elif code == 304: if request.GET.has_key('x-sup-id'): x_sup_id = request.GET.get('x-sup-id') if '\r\n' not in x_sup_id: c.response.headers['x-sup-id'] = x_sup_id return c.response elif c.site: return self.send404() else: return "page not found" except: return handle_awful_failure("something really bad just happened.")
def process_link(msg): fname = msg.body link = Link._by_fullname(msg.body, data=True) try: TimeoutFunction(_set_media, 30)(link) except TimeoutFunctionException: print "Timed out on %s" % fname except KeyboardInterrupt: raise except: print "Error fetching %s" % fname print traceback.format_exc()
def process_link(msg): fname = msg.body link = Link._by_fullname(msg.body, data=True) try: TimeoutFunction(_set_media, 30)(embedly_services, link) except TimeoutFunctionException: print "Timed out on %s" % fname except KeyboardInterrupt: raise except: print "Error fetching %s" % fname print traceback.format_exc()
def GET_document(self): try: c.errors = c.errors or ErrorSet() # clear cookies the old fashioned way c.cookies = Cookies() code = request.GET.get('code', '') try: code = int(code) except ValueError: code = 404 srname = request.GET.get('srname', '') takedown = request.GET.get('takedown', "") if srname: c.site = Subreddit._by_name(srname) if c.render_style not in self.allowed_render_styles: if code not in (204, 304): c.response.content = str(code) c.response.status_code = code return c.response elif c.render_style in extensions.API_TYPES: data = request.environ.get('extra_error_data', {'error': code}) c.response.content = websafe_json(json.dumps(data)) return c.response elif takedown and code == 404: link = Link._by_fullname(takedown) return pages.TakedownPage(link).render() elif code == 403: return self.send403() elif code == 429: return self.send429() elif code == 500: randmin = {'admin': rand.choice(self.admins)} failien_name = 'youbrokeit%d.png' % rand.randint(1, NUM_FAILIENS) failien_url = static(failien_name) return redditbroke % (failien_url, rand_strings.sadmessages % randmin) elif code == 503: return self.send503() elif code == 304: if request.GET.has_key('x-sup-id'): x_sup_id = request.GET.get('x-sup-id') if '\r\n' not in x_sup_id: c.response.headers['x-sup-id'] = x_sup_id return c.response elif c.site: return self.send404() else: return "page not found" except: return handle_awful_failure("something really bad just happened.")
def GET_document(self): try: c.errors = c.errors or ErrorSet() # clear cookies the old fashioned way c.cookies = Cookies() code = request.GET.get('code', '') try: code = int(code) except ValueError: code = 404 srname = request.GET.get('srname', '') takedown = request.GET.get('takedown', "") if srname: c.site = Subreddit._by_name(srname) if code in (204, 304): # NEVER return a content body on 204/304 or downstream # caches may become very confused. if request.GET.has_key('x-sup-id'): x_sup_id = request.GET.get('x-sup-id') if '\r\n' not in x_sup_id: response.headers['x-sup-id'] = x_sup_id return "" elif c.render_style not in self.allowed_render_styles: return str(code) elif c.render_style in extensions.API_TYPES: data = request.environ.get('extra_error_data', {'error': code}) return websafe_json(json.dumps(data)) elif takedown and code == 404: link = Link._by_fullname(takedown) return pages.TakedownPage(link).render() elif code == 403: return self.send403() elif code == 429: return self.send429() elif code == 500: randmin = {'admin': random.choice(self.admins)} failien_url = make_failien_url() return redditbroke % (failien_url, rand_strings.sadmessages % randmin) elif code == 503: return self.send503() elif c.site: return self.send404() else: return "page not found" except: return handle_awful_failure("something really bad just happened.")
def delete_comment(comment): link = Link._byID(comment.link_id, data=True) timer = g.stats.get_timer('comment_tree.delete.%s' % link.comment_tree_version) timer.start() with CommentTree.mutation_context(link): timer.intermediate('lock') cache = get_comment_tree(link) timer.intermediate('get') cache.delete_comment(comment, link) timer.intermediate('update') from r2.lib.db.queries import changed changed([link]) timer.intermediate('changed') timer.stop()
def find_tz(): q = Link._query(sort=desc('_hot'), limit=1) link = list(q)[0] t = tdb_sql.get_thing_table(Link._type_id)[0] s = sa.select([sa.func.hot(t.c.ups, t.c.downs, t.c.date), t.c.thing_id], t.c.thing_id == link._id) db_hot = s.execute().fetchall()[0].hot.__float__() db_hot == round(db_hot, 7) for tz_name in pytz.common_timezones: tz = pytz.timezone(tz_name) sorts.epoch = datetime(1970, 1, 1, tzinfo=tz) if db_hot == link._hot: print tz_name
def calc_rising(): link_counts = count.get_link_counts() links = Link._by_fullname(link_counts.keys(), data=True) def score(link): count = link_counts[link._fullname][0] return float(link._ups) / max(count, 1) # build the rising list, excluding items having 1 or less upvotes rising = [] for link in links.values(): if link._ups > 1: rising.append((link._fullname, score(link), link.sr_id)) # return rising sorted by score return sorted(rising, key=lambda x: x[1], reverse=True)
def GET_document(self): try: # clear cookies the old fashioned way c.cookies = Cookies() code = request.GET.get('code', '') try: code = int(code) except ValueError: code = 404 srname = request.GET.get('srname', '') takedown = request.GET.get('takedown', "") if srname: c.site = Subsciteit._by_name(srname) if c.render_style not in self.allowed_render_styles: if code not in (204, 304): c.response.content = str(code) return c.response elif c.render_style == "api": c.response.content = "{error: %s}" % code return c.response elif takedown and code == 404: link = Link._by_fullname(takedown) return pages.TakedownPage(link).render() elif code == 403: return self.send403() elif code == 500: randmin = {'admin': rand.choice(self.admins)} failien_name = 'youbrokeit%d.png' % rand.randint(1, NUM_FAILIENS) failien_url = static(failien_name) return sciteitbroke % (failien_url, rand_strings.sadmessages % randmin) elif code == 503: return self.send503() elif code == 304: if request.GET.has_key('x-sup-id'): x_sup_id = request.GET.get('x-sup-id') if '\r\n' not in x_sup_id: c.response.headers['x-sup-id'] = x_sup_id return c.response elif c.site: return self.send404() else: return "page not found" except: return handle_awful_failure("something really bad just happened.")
def find_tz(): q = Link._query(sort = desc('_hot'), limit = 1) link = list(q)[0] t = tdb_sql.get_thing_table(Link._type_id)[0] s = sa.select([sa.func.hot(t.c.ups, t.c.downs, t.c.date), t.c.thing_id], t.c.thing_id == link._id) db_hot = s.execute().fetchall()[0].hot.__float__() db_hot == round(db_hot, 7) for tz_name in pytz.common_timezones: tz = pytz.timezone(tz_name) sorts.epoch = datetime(1970, 1, 1, tzinfo = tz) if db_hot == link._hot: print tz_name
def GET_document(self): try: # clear cookies the old fashioned way c.cookies = Cookies() code = request.GET.get("code", "") try: code = int(code) except ValueError: code = 404 srname = request.GET.get("srname", "") takedown = request.GET.get("takedown", "") if srname: c.site = Subreddit._by_name(srname) if c.render_style not in self.allowed_render_styles: if code not in (204, 304): c.response.content = str(code) return c.response elif c.render_style == "api": c.response.content = "{error: %s}" % code return c.response elif takedown and code == 404: link = Link._by_fullname(takedown) return pages.TakedownPage(link).render() elif code == 403: return self.send403() elif code == 500: return redditbroke % (rand.randint(1, NUM_FAILIENS), rand_strings.sadmessages) elif code == 503: return self.send503() elif code == 304: if request.GET.has_key("x-sup-id"): x_sup_id = request.GET.get("x-sup-id") if "\r\n" not in x_sup_id: c.response.headers["x-sup-id"] = x_sup_id return c.response elif c.site: return self.send404() else: return "page not found" except: return handle_awful_failure("something really bad just happened.")
def GET_document(self): try: # clear cookies the old fashioned way c.cookies = Cookies() code = request.GET.get('code', '') try: code = int(code) except ValueError: code = 404 srname = request.GET.get('srname', '') takedown = request.GET.get('takedown', "") if srname: c.site = Subreddit._by_name(srname) if c.render_style not in self.allowed_render_styles: if code not in (204, 304): c.response.content = str(code) return c.response elif c.render_style == "api": c.response.content = "{error: %s}" % code return c.response elif takedown and code == 404: link = Link._by_fullname(takedown) return pages.TakedownPage(link).render() elif code == 403: return self.send403() elif code == 500: return redditbroke % (rand.randint( 1, NUM_FAILIENS), rand_strings.sadmessages) elif code == 503: return self.send503() elif code == 304: if request.GET.has_key('x-sup-id'): c.response.headers['x-sup-id'] = request.GET.get( 'x-sup-id') return c.response elif c.site: return self.send404() else: return "page not found" except: return handle_awful_failure("something really bad just happened.")
def delete_comment(comment): with g.make_lock(lock_key(comment.link_id)): cids, comment_tree, depth, num_children = link_comments(comment.link_id) # only completely remove comments with no children if comment._id not in comment_tree: if comment._id in cids: cids.remove(comment._id) if comment._id in depth: del depth[comment._id] if comment._id in num_children: del num_children[comment._id] g.permacache.set(comments_key(comment.link_id), (cids, comment_tree, depth, num_children)) # update the link's comment count and schedule it for search reindexing link = Link._byID(comment.link_id, data = True) link._incr('num_comments', -1) from r2.lib.db.queries import changed changed(link)
def update_comment_votes(comments): from r2.models import CommentScoresByLink comments = tup(comments) comments_by_link_id = defaultdict(list) for comment in comments: comments_by_link_id[comment.link_id].append(comment) links_by_id = Link._byID(comments_by_link_id.keys(), data=True) for link_id, link_comments in comments_by_link_id.iteritems(): link = links_by_id[link_id] for sort in ("_controversy", "_confidence", "_score"): scores_by_comment = { comment._id36: getattr(comment, sort) for comment in link_comments } CommentScoresByLink.set_scores(link, sort, scores_by_comment) scores_by_comment = _get_qa_comment_scores(link, link_comments) CommentScoresByLink.set_scores(link, "_qa", scores_by_comment)
def delete_comment(comment): with g.make_lock(lock_key(comment.link_id)): cids, comment_tree, depth, num_children = link_comments( comment.link_id) # only completely remove comments with no children if comment._id not in comment_tree: if comment._id in cids: cids.remove(comment._id) if comment._id in depth: del depth[comment._id] if comment._id in num_children: del num_children[comment._id] g.permacache.set(comments_key(comment.link_id), (cids, comment_tree, depth, num_children)) # update the link's comment count and schedule it for search reindexing link = Link._byID(comment.link_id, data=True) link._incr('num_comments', -1) from r2.lib.db.queries import changed changed(link)
def GET_document(self): try: # clear cookies the old fashioned way c.cookies = Cookies() code = request.GET.get('code', '') try: code = int(code) except ValueError: code = 404 srname = request.GET.get('srname', '') takedown = request.GET.get('takedown', "") if srname: c.site = Subreddit._by_name(srname) if c.render_style not in self.allowed_render_styles: c.response.content = str(int(code)) return c.response elif c.render_style == "api": c.response.content = "{error: %s}" % code return c.response elif takedown and code == '404': link = Link._by_fullname(takedown) return pages.TakedownPage(link).render() elif code == '403': return self.send403() elif code == '500': return redditbroke % (rand.randint(1,NUM_FAILIENS), rand_strings.sadmessages) elif code == '503': return self.send503() elif code == '304': if request.GET.has_key('x-sup-id'): c.response.headers['x-sup-id'] = request.GET.get('x-sup-id') return c.response elif c.site: return self.send404() else: return "page not found" except: return handle_awful_failure("something really bad just happened.")
def add_comments(comments): """Add comments to the CommentTree and update scores.""" from r2.models.builder import write_comment_orders link_ids = [comment.link_id for comment in tup(comments)] links = Link._byID(link_ids, data=True) comments = tup(comments) comments_by_link_id = defaultdict(list) for comment in comments: comments_by_link_id[comment.link_id].append(comment) for link_id, link_comments in comments_by_link_id.iteritems(): link = links[link_id] timer = g.stats.get_timer('comment_tree.add.1') timer.start() # write scores before CommentTree because the scores must exist for all # comments in the tree for sort in ("_controversy", "_confidence", "_score"): scores_by_comment = { comment._id36: getattr(comment, sort) for comment in link_comments } CommentScoresByLink.set_scores(link, sort, scores_by_comment) scores_by_comment = _get_qa_comment_scores(link, link_comments) CommentScoresByLink.set_scores(link, "_qa", scores_by_comment) timer.intermediate('scores') CommentTree.add_comments(link, link_comments) timer.intermediate('update') write_comment_orders(link) timer.intermediate('write_order') timer.stop()
def process_new_links(period = media_period, force = False): """Fetches links from the last period and sets their media properities. If force is True, it will fetch properities for links even if the properties already exist""" links = Link._query(Link.c._date > timeago(period), sort = desc('_date'), data = True) results = {} jobs = [] for link in fetch_things2(links): if link.is_self or link.promoted: continue elif not force and (link.has_thumbnail or link.media_object): continue jobs.append(make_link_info_job(results, link, g.useragent)) #send links to a queue wq = WorkQueue(jobs, num_workers = 20, timeout = 30) wq.start() wq.jobs.join() #when the queue is finished, do the db writes in this thread for link, info in results.items(): update_link(link, info[0], info[1])
def GET_document(self): try: c.errors = c.errors or ErrorSet() # clear cookies the old fashioned way c.cookies = Cookies() code = request.GET.get('code', '') try: code = int(code) except ValueError: code = 404 srname = request.GET.get('srname', '') takedown = request.GET.get('takedown', '') error_name = request.GET.get('error_name', '') if isinstance(c.user, basestring): # somehow requests are getting here with c.user unset c.user_is_loggedin = False c.user = UnloggedUser(browser_langs=None) if srname: c.site = Subreddit._by_name(srname) if request.GET.has_key('allow_framing'): c.allow_framing = bool(request.GET['allow_framing'] == '1') if (error_name == 'IN_TIMEOUT' and not 'usable_error_content' in request.environ): timeout_days_remaining = c.user.days_remaining_in_timeout errpage = pages.InterstitialPage( _("suspended"), content=pages.InTimeoutInterstitial( timeout_days_remaining=timeout_days_remaining, ), ) request.environ['usable_error_content'] = errpage.render() if code in (204, 304): # NEVER return a content body on 204/304 or downstream # caches may become very confused. return "" elif c.render_style not in self.allowed_render_styles: return str(code) elif c.render_style in extensions.API_TYPES: data = request.environ.get('extra_error_data', {'error': code}) message = request.GET.get('message', '') if message: data['message'] = message if request.environ.get("WANT_RAW_JSON"): return scriptsafe_dumps(data) return websafe_json(json.dumps(data)) elif takedown and code == 404: link = Link._by_fullname(takedown) return pages.TakedownPage(link).render() elif code == 400: return self.send400() elif code == 403: return self.send403() elif code == 429: return self.send429() elif code == 500: failien_url = make_failien_url() sad_message = get_funny_translated_string("500_page") sad_message %= {'admin': random.choice(self.admins)} sad_message = safemarkdown(sad_message) return redditbroke % (failien_url, sad_message) elif code == 503: return self.send503() elif c.site: return self.send404() else: return "page not found" except Exception as e: return handle_awful_failure("ErrorController.GET_document: %r" % e)
print ' aborting - bad preview object: %s' % preview_object return False if not preview_object['url']: print ' aborting - bad preview url: %s' % preview_object['url'] return False return True s3 = boto.connect_s3(g.S3KEY_ID or None, g.S3SECRET_KEY or None) for uid, columns in LinksByImage._cf.get_range(): # When resuming, use: #for uid, columns in LinksByImage._cf.get_range(start='<uid>'): print 'Looking at image %s' % uid link_ids = columns.keys() links = Link._byID36(link_ids, return_dict=False, data=True) if not links: continue # Pull information about the image from the first link (they *should* all # be the same). link = links[0] preview_object = link.preview_object if not good_preview_object(preview_object): continue u = UrlParser(preview_object['url']) if preview_object['url'].startswith(g.media_fs_base_url_http): # Uploaded to the local filesystem instead of s3. Should only be in # dev. print ' non-s3 image'
def add_comments(comments): """Add comments to the CommentTree and update scores.""" from r2.models.builder import write_comment_orders link_ids = [comment.link_id for comment in tup(comments)] links = Link._byID(link_ids, data=True) comments = tup(comments) comments_by_link_id = defaultdict(list) for comment in comments: comments_by_link_id[comment.link_id].append(comment) for link_id, link_comments in comments_by_link_id.iteritems(): link = links[link_id] new_comments = [ comment for comment in link_comments if not comment._deleted ] deleted_comments = [ comment for comment in link_comments if comment._deleted ] timer = g.stats.get_timer('comment_tree.add.%s' % link.comment_tree_version) timer.start() # write scores before CommentTree because the scores must exist for all # comments in the tree for sort in ("_controversy", "_confidence", "_score"): scores_by_comment = { comment._id36: getattr(comment, sort) for comment in link_comments } CommentScoresByLink.set_scores(link, sort, scores_by_comment) scores_by_comment = _get_qa_comment_scores(link, link_comments) CommentScoresByLink.set_scores(link, "_qa", scores_by_comment) timer.intermediate('scores') with CommentTree.mutation_context(link, timeout=180): try: timer.intermediate('lock') comment_tree = CommentTree.by_link(link, timer) timer.intermediate('get') if new_comments: comment_tree.add_comments(new_comments) for comment in deleted_comments: comment_tree.delete_comment(comment, link) timer.intermediate('update') except InconsistentCommentTreeError: # this exception occurs when we add a comment to the tree but # its parent isn't in the tree yet, need to rebuild the tree # from scratch comment_ids = [comment._id for comment in link_comments] g.log.exception( 'add_comments_nolock failed for link %s %s, recomputing', link_id, comment_ids) comment_tree = CommentTree.rebuild(link) timer.intermediate('rebuild') # the tree rebuild updated the link's comment count, so schedule # it for search reindexing link.update_search_index() timer.intermediate('update_search_index') g.stats.simple_event('comment_tree_inconsistent') # do this under the same lock because we want to ensure we are using # the same version of the CommentTree as was just written write_comment_orders(link) timer.intermediate('write_order') timer.stop()
def _process_link(fname): link = Link._by_fullname(fname, data=True, return_dict=False) set_media(link)
def _process_link(fname): link = Link._by_fullname(fname, data=True) set_media(link)
def process_message(msg): fname = msg.body link = Link._by_fullname(fname, data=True) extract_keywords(link)
def add_comments(comments): """Add comments to the CommentTree and update scores.""" from r2.models.builder import write_comment_orders link_ids = [comment.link_id for comment in tup(comments)] links = Link._byID(link_ids, data=True) comments = tup(comments) comments_by_link_id = defaultdict(list) for comment in comments: comments_by_link_id[comment.link_id].append(comment) for link_id, link_comments in comments_by_link_id.iteritems(): link = links[link_id] new_comments = [ comment for comment in link_comments if not comment._deleted] deleted_comments = [ comment for comment in link_comments if comment._deleted] timer = g.stats.get_timer( 'comment_tree.add.%s' % link.comment_tree_version) timer.start() # write scores before CommentTree because the scores must exist for all # comments in the tree for sort in ("_controversy", "_confidence", "_score"): scores_by_comment = { comment._id36: getattr(comment, sort) for comment in link_comments } CommentScoresByLink.set_scores(link, sort, scores_by_comment) scores_by_comment = _get_qa_comment_scores(link, link_comments) CommentScoresByLink.set_scores(link, "_qa", scores_by_comment) timer.intermediate('scores') with CommentTree.mutation_context(link, timeout=180): try: timer.intermediate('lock') comment_tree = CommentTree.by_link(link, timer) timer.intermediate('get') if new_comments: comment_tree.add_comments(new_comments) for comment in deleted_comments: comment_tree.delete_comment(comment, link) timer.intermediate('update') except InconsistentCommentTreeError: # this exception occurs when we add a comment to the tree but # its parent isn't in the tree yet, need to rebuild the tree # from scratch comment_ids = [comment._id for comment in link_comments] g.log.exception( 'add_comments_nolock failed for link %s %s, recomputing', link_id, comment_ids) comment_tree = CommentTree.rebuild(link) timer.intermediate('rebuild') # the tree rebuild updated the link's comment count, so schedule # it for search reindexing link.update_search_index() timer.intermediate('update_search_index') g.stats.simple_event('comment_tree_inconsistent') # do this under the same lock because we want to ensure we are using # the same version of the CommentTree as was just written write_comment_orders(link, timer) timer.intermediate('write_order') timer.stop()