def gen_keys(): yield promoted_memo_key # just let this one do its own writing load_all_reddits() yield queries.get_all_comments().iden l_q = Link._query(Link.c._spam == (True, False), Link.c._deleted == (True, False), sort=desc('_date'), data=True, ) for link in fetch_things2(l_q, verbosity): yield comments_key(link._id) yield last_modified_key(link, 'comments') a_q = Account._query(Account.c._spam == (True, False), sort=desc('_date'), ) for account in fetch_things2(a_q, verbosity): yield messages_key(account._id) yield last_modified_key(account, 'overview') yield last_modified_key(account, 'commented') yield last_modified_key(account, 'submitted') yield last_modified_key(account, 'liked') yield last_modified_key(account, 'disliked') yield queries.get_comments(account, 'new', 'all').iden yield queries.get_submitted(account, 'new', 'all').iden yield queries.get_liked(account).iden yield queries.get_disliked(account).iden yield queries.get_hidden(account).iden yield queries.get_saved(account).iden yield queries.get_inbox_messages(account).iden yield queries.get_unread_messages(account).iden yield queries.get_inbox_comments(account).iden yield queries.get_unread_comments(account).iden yield queries.get_inbox_selfreply(account).iden yield queries.get_unread_selfreply(account).iden yield queries.get_sent(account).iden sr_q = Subreddit._query(Subreddit.c._spam == (True, False), sort=desc('_date'), ) for sr in fetch_things2(sr_q, verbosity): yield last_modified_key(sr, 'stylesheet_contents') yield queries.get_links(sr, 'hot', 'all').iden yield queries.get_links(sr, 'new', 'all').iden for sort in 'top', 'controversial': for time in 'hour', 'day', 'week', 'month', 'year', 'all': yield queries.get_links(sr, sort, time, merge_batched=False).iden yield queries.get_spam_links(sr).iden yield queries.get_spam_comments(sr).iden yield queries.get_reported_links(sr).iden yield queries.get_reported_comments(sr).iden yield queries.get_subreddit_messages(sr).iden yield queries.get_unread_subreddit_messages(sr).iden
def gen_keys(): yield promoted_memo_key # just let this one do its own writing load_all_reddits() yield queries.get_all_comments().iden l_q = Link._query( Link.c._spam == (True, False), Link.c._deleted == (True, False), sort=desc("_date"), data=True ) for link in fetch_things2(l_q, verbosity): yield comments_key(link._id) yield last_modified_key(link, "comments") a_q = Account._query(Account.c._spam == (True, False), sort=desc("_date")) for account in fetch_things2(a_q, verbosity): yield messages_key(account._id) yield last_modified_key(account, "overview") yield last_modified_key(account, "commented") yield last_modified_key(account, "submitted") yield last_modified_key(account, "liked") yield last_modified_key(account, "disliked") yield queries.get_comments(account, "new", "all").iden yield queries.get_submitted(account, "new", "all").iden yield queries.get_liked(account).iden yield queries.get_disliked(account).iden yield queries.get_hidden(account).iden yield queries.get_saved(account).iden yield queries.get_inbox_messages(account).iden yield queries.get_unread_messages(account).iden yield queries.get_inbox_comments(account).iden yield queries.get_unread_comments(account).iden yield queries.get_inbox_selfreply(account).iden yield queries.get_unread_selfreply(account).iden yield queries.get_sent(account).iden sr_q = Subreddit._query(Subreddit.c._spam == (True, False), sort=desc("_date")) for sr in fetch_things2(sr_q, verbosity): yield last_modified_key(sr, "stylesheet_contents") yield queries.get_links(sr, "hot", "all").iden yield queries.get_links(sr, "new", "all").iden for sort in "top", "controversial": for time in "hour", "day", "week", "month", "year", "all": yield queries.get_links(sr, sort, time, merge_batched=False).iden yield queries.get_spam_links(sr).iden yield queries.get_spam_comments(sr).iden yield queries.get_reported_links(sr).iden yield queries.get_reported_comments(sr).iden yield queries.get_subreddit_messages(sr).iden yield queries.get_unread_subreddit_messages(sr).iden
def process_message(msgs, chan): """Update get_links(), the Links by Subreddit precomputed query. get_links() is a CachedResult which is stored in permacache. To update these objects we need to do a read-modify-write which requires obtaining a lock. Sharding these updates by subreddit allows us to run multiple consumers (but ideally just one per shard) to avoid lock contention. """ from r2.lib.db.queries import add_queries, get_links link_names = {msg.body for msg in msgs} links = Link._by_fullname(link_names, return_dict=False) print 'Processing %r' % (links,) links_by_sr_id = defaultdict(list) for link in links: links_by_sr_id[link.sr_id].append(link) srs_by_id = Subreddit._byID(links_by_sr_id.keys(), stale=True) for sr_id, links in links_by_sr_id.iteritems(): with g.stats.get_timer("link_vote_processor.subreddit_queries"): sr = srs_by_id[sr_id] add_queries( queries=[get_links(sr, sort, "all") for sort in SORTS], insert_items=links, )
def store_keys(key, maxes): # we're building queries from queries.py, but we could avoid this # by making the queries ourselves if we wanted to avoid the # individual lookups for accounts and subreddits userrel_fns = dict(liked=queries.get_liked, disliked=queries.get_disliked, saved=queries.get_saved, hidden=queries.get_hidden) if key.startswith('user-'): acc_str, keytype, account_id = key.split('-') account_id = int(account_id) fn = queries.get_submitted if keytype == 'submitted' else queries.get_comments q = fn(Account._byID(account_id), 'new', 'all') insert_to_query(q, [(fname, float(timestamp)) for (timestamp, fname) in maxes]) elif key.startswith('sr-'): sr_str, sort, time, sr_id = key.split('-') sr_id = int(sr_id) if sort == 'controversy': # I screwed this up in the mapper and it's too late to fix # it sort = 'controversial' q = queries.get_links(Subreddit._byID(sr_id), sort, time) insert_to_query( q, [tuple([item[-1]] + map(float, item[:-1])) for item in maxes]) elif key.split('-')[0] in userrel_fns: key_type, account_id = key.split('-') account_id = int(account_id) fn = userrel_fns[key_type] q = fn(Account._byID(account_id)) insert_to_query( q, [tuple([item[-1]] + map(float, item[:-1])) for item in maxes])
def process_message(msgs, chan): """Update get_links(), the Links by Subreddit precomputed query. get_links() is a CachedResult which is stored in permacache. To update these objects we need to do a read-modify-write which requires obtaining a lock. Sharding these updates by subreddit allows us to run multiple consumers (but ideally just one per shard) to avoid lock contention. """ from r2.lib.db.queries import add_queries, get_links link_names = {msg.body for msg in msgs} links = Link._by_fullname(link_names, return_dict=False) print 'Processing %r' % (links, ) links_by_sr_id = defaultdict(list) for link in links: links_by_sr_id[link.sr_id].append(link) srs_by_id = Subreddit._byID(links_by_sr_id.keys(), stale=True) for sr_id, links in links_by_sr_id.iteritems(): with g.stats.get_timer("link_vote_processor.subreddit_queries"): sr = srs_by_id[sr_id] add_queries( queries=[get_links(sr, sort, "all") for sort in SORTS], insert_items=links, )
def get_links(self, sort, time, link_cls=None): from r2.lib.db import queries from r2.models import Link if not link_cls: link_cls = Link return queries.get_links(self, sort, time, link_cls)
def get_links_sr_ids(self, sr_ids, sort, time, link_cls=None): from r2.lib.db import queries from r2.models import Link if not link_cls: link_cls = Link if not sr_ids: srs = [] else: srs = Subreddit._byID(sr_ids, return_dict=False) if g.use_query_cache: results = [] for sr in srs: results.append(queries.get_links(sr, sort, time)) return queries.merge_cached_results(*results) else: q = link_cls._query(link_cls.c.sr_id == sr_ids, sort=queries.db_sort(sort)) if sort == 'toplinks': q._filter(link_cls.c.top_link == True) elif sort == 'blessed': q._filter(link_cls.c.blessed == True) if time != 'all': q._filter(queries.db_times[time]) return q
def store_keys(key, maxes): # we're building queries from queries.py, but we could avoid this # by making the queries ourselves if we wanted to avoid the # individual lookups for accounts and spaces userrel_fns = dict( liked=queries.get_liked, disliked=queries.get_disliked, saved=queries.get_saved, hidden=queries.get_hidden ) if key.startswith("user-"): acc_str, keytype, account_id = key.split("-") account_id = int(account_id) fn = queries.get_submitted if keytype == "submitted" else queries.get_comments q = fn(Account._byID(account_id), "new", "all") insert_to_query(q, [(fname, float(timestamp)) for (timestamp, fname) in maxes]) elif key.startswith("sr-"): sr_str, sort, time, sr_id = key.split("-") sr_id = int(sr_id) if sort == "controversy": # I screwed this up in the mapper and it's too late to fix # it sort = "controversial" q = queries.get_links(Subreddit._byID(sr_id), sort, time) insert_to_query(q, [tuple([item[-1]] + map(float, item[:-1])) for item in maxes]) elif key.split("-")[0] in userrel_fns: key_type, account_id = key.split("-") account_id = int(account_id) fn = userrel_fns[key_type] q = fn(Account._byID(account_id)) insert_to_query(q, [tuple([item[-1]] + map(float, item[:-1])) for item in maxes])
def get_links_sr_ids(self, sr_ids, sort, time, link_cls = None): from r2.lib.db import queries from r2.models import Link if not link_cls: link_cls = Link if not sr_ids: srs = [] else: srs = Subreddit._byID(sr_ids, return_dict = False) if g.use_query_cache: results = [] for sr in srs: results.append(queries.get_links(sr, sort, time)) return queries.merge_cached_results(*results) else: q = link_cls._query(link_cls.c.sr_id == sr_ids, sort = queries.db_sort(sort)) if sort == 'toplinks': q._filter(link_cls.c.top_link == True) elif sort == 'blessed': q._filter(link_cls.c.blessed == True) if time != 'all': q._filter(queries.db_times[time]) return q
def get_links(self, sort, time, link_cls = None): from r2.lib.db import queries from r2.models import Link if not link_cls: link_cls = Link return queries.get_links(self, sort, time, link_cls)
def get_links_sr_ids(self, sr_ids, sort, time): from r2.lib.db import queries if not sr_ids: return [] else: srs = Subreddit._byID(sr_ids, data=True, return_dict=False) results = [queries.get_links(sr, sort, time) for sr in srs] return queries.merge_results(*results)
def test_get_links(self): from r2.lib.db import queries from r2.models import Subreddit, Account, Link, Thing account = Account._byID(1, data=True) sr = Subreddit._by_name("reddit_test0") link_url = self.make_unique_url() new_link = Link._submit("test_get_links", link_url, account, sr, "127.0.0.1", kind="link") queries.new_link(new_link, foreground=True) res = Thing._by_fullname(queries.get_links(sr, "new", "all"), return_dict=False) self.assert_true(len(res) > 0, "no links returned") self.assert_equal(new_link._id, res[0]._id)
def store_keys(key, maxes): # we're building queries using queries.py, but we could make the # queries ourselves if we wanted to avoid the individual lookups # for accounts and subreddits. # Note that we're only generating the 'sr-' type queries here, but # we're also able to process the other listings generated by the # old migrate.mr_permacache for convenience userrel_fns = dict(liked = queries.get_liked, disliked = queries.get_disliked, saved = queries.get_saved, hidden = queries.get_hidden) if key.startswith('user-'): acc_str, keytype, account_id = key.split('-') account_id = int(account_id) fn = queries.get_submitted if keytype == 'submitted' else queries.get_comments q = fn(Account._byID(account_id), 'new', 'all') q._insert_tuples([(fname, float(timestamp)) for (timestamp, fname) in maxes]) elif key.startswith('sr-'): sr_str, sort, time, sr_id = key.split('-') sr_id = int(sr_id) if sort == 'controversy': # I screwed this up in the mapper and it's too late to fix # it sort = 'controversial' q = queries.get_links(Subreddit._byID(sr_id), sort, time) q._insert_tuples([tuple([item[-1]] + map(float, item[:-1])) for item in maxes]) elif key.startswith('domain/'): d_str, sort, time, domain = key.split('/') q = queries.get_domain_links(domain, sort, time) q._insert_tuples([tuple([item[-1]] + map(float, item[:-1])) for item in maxes]) elif key.split('-')[0] in userrel_fns: key_type, account_id = key.split('-') account_id = int(account_id) fn = userrel_fns[key_type] q = fn(Account._byID(account_id)) q._insert_tuples([tuple([item[-1]] + map(float, item[:-1])) for item in maxes])
def get_recent_name_submissions(): link_fullnames = list(queries.get_links(SERVERNAME_SR, "new", "all")) links = chain.from_iterable(Thing._by_fullname(chunk, return_dict=False) for chunk in in_chunks(link_fullnames)) for link in links: if link._deleted or link._spam: continue # OH GOD WHAT HAVE YOU POSTED IN MY LOVELY AUTOMATED SUBREDDIT!? if (not hasattr(link, "revenue_date") or not hasattr(link, "revenue_bucket") or not hasattr(link, "server_names")): continue yield link
def get_links_sr_ids(self, sr_ids, sort, time): from r2.lib.db import queries from r2.models import Link if not sr_ids: return [] else: srs = Subreddit._byID(sr_ids, data=True, return_dict=False) if g.use_query_cache: results = [queries.get_links(sr, sort, time) for sr in srs] return queries.merge_results(*results) else: q = Link._query(Link.c.sr_id == sr_ids, sort=queries.db_sort(sort), data=True) if time != "all": q._filter(queries.db_times[time]) return q
def get_links_sr_ids(self, sr_ids, sort, time): from r2.lib.db import queries from r2.models import Link if not sr_ids: return [] else: srs = Subreddit._byID(sr_ids, return_dict=False) if g.use_query_cache: results = [queries.get_links(sr, sort, time) for sr in srs] return queries.merge_results(*results) else: q = Link._query(Link.c.sr_id == sr_ids, sort=queries.db_sort(sort)) if time != 'all': q._filter(queries.db_times[time]) return q
def test_get_files(self): from r2.lib.db import queries from r2.models import Subreddit, Account, Link, Thing account = Account._byID(1, data=True) sr = Subreddit._by_name("reddit_test0") link_url = self.make_unique_url() new_link = Link._submit("test_get_files", link_url, account, sr, "127.0.0.1", kind="file") queries.new_link(new_link, foreground=True) # make sure it returns like a normal link res = Thing._by_fullname(queries.get_links(sr, "new", "all"), return_dict=False) self.assert_true(len(res) > 0, "no links returned") self.assert_equal(new_link._id, res[0]._id) # should return with a kind = 'file' filter res = list(queries.get_files(sr)) self.assert_true(len(res) > 0, "no links returned") self.assert_equal(new_link._id, res[0]._id)
def get_links_sr_ids(self, sr_ids, sort, time): from r2.lib.db import queries from r2.models import Link from r2.lib.normalized_hot import expand_children if not sr_ids: return [] else: srs = Subsciteit._byID(sr_ids, data=True, return_dict = False) if g.use_query_cache: srs = expand_children(srs) results = [queries.get_links(sr, sort, time,no_children=True) for sr in srs] return queries.merge_results(*results) else: sr_ids = expand_children(sr_ids,byID=True) q = Link._query(Link.c.sr_id == sr_ids, sort = queries.db_sort(sort), data=True) if time != 'all': q._filter(queries.db_times[time]) return q
def get_links(self, sort, time): from r2.lib.db import queries return queries.get_links(self, sort, time)