def time_listings(times = ('year','month','week','day','hour')): oldests = dict((t, epoch_seconds(timeago('1 %s' % t))) for t in times) @mr_tools.dataspec_m_thing(("url", str),('sr_id', int),) def process(link): assert link.thing_type == 'link' timestamp = link.timestamp fname = make_fullname(Link, link.thing_id) if not link.spam and not link.deleted: sr_id = link.sr_id if link.url: domains = UrlParser(link.url).domain_permutations() else: domains = [] ups, downs = link.ups, link.downs for tkey, oldest in oldests.iteritems(): if timestamp > oldest: sc = score(ups, downs) contr = controversy(ups, downs) yield ('sr-top-%s-%d' % (tkey, sr_id), sc, timestamp, fname) yield ('sr-controversial-%s-%d' % (tkey, sr_id), contr, timestamp, fname) for domain in domains: yield ('domain/top/%s/%s' % (tkey, domain), sc, timestamp, fname) yield ('domain/controversial/%s/%s' % (tkey, domain), contr, timestamp, fname) mr_tools.mr_map(process)
def normalized_hot_cached(sr_ids): results = [] srs = Subreddit._byID(sr_ids, data = True, return_dict = False) for sr in srs: #items = get_hot(sr) items = filter(lambda l: l._date > utils.timeago('%d day' % g.HOT_PAGE_AGE), get_hot(sr)) if not items: continue top_score = max(items[0]._hot, 1) top, rest = items[:2], items[2:] if top: normals = [l._hot / top_score for l in top] results.extend((l, random.choice(normals)) for l in top) #random.shuffle(normals) #results.extend((l, normals.pop()) for l in top) if rest: results.extend((l, l._hot / top_score) for l in rest) results.sort(key = lambda x: (x[1], x[0]._hot), reverse = True) return [l[0]._fullname for l in results]
def process_new_links(period=media_period, force=False): """Fetches links from the last period and sets their media properities. If force is True, it will fetch properities for links even if the properties already exist""" links = Link._query(Link.c._date > timeago(period), sort=desc('_date'), data=True) results = {} jobs = [] for link in fetch_things2(links): if link.is_self or link.promoted: continue elif not force and (link.has_thumbnail or link.media_object): continue jobs.append(make_link_info_job(results, link, g.useragent)) #send links to a queue wq = WorkQueue(jobs, num_workers=20, timeout=30) wq.start() wq.jobs.join() #when the queue is finished, do the db writes in this thread for link, info in results.items(): update_link(link, info[0], info[1])
def user_vote_change_links(period = '1 day'): rel = Vote.rel(Account, Link) type = tdb.rel_types_id[rel._type_id] # rt = rel table # dt = data table rt, account_tt, link_tt, dt = type.rel_table aliases = tdb.alias_generator() author_dt = dt.alias(aliases.next()) link_dt = tdb.types_id[Link._type_id].data_table[0].alias(aliases.next()) # Create an SQL CASE statement for the subreddit vote multiplier cases = [] for subreddit in subreddits_with_custom_karma_multiplier(): cases.append( (sa.cast(link_dt.c.value,sa.Integer) == subreddit._id, subreddit.post_karma_multiplier) ) cases.append( (True, g.post_karma_multiplier) ) # The default article multiplier date = utils.timeago(period) s = sa.select([author_dt.c.value, sa.func.sum(sa.cast(rt.c.name, sa.Integer) * sa.case(cases))], sa.and_(rt.c.date >= date, author_dt.c.thing_id == rt.c.rel_id, author_dt.c.key == 'author_id', link_tt.c.thing_id == rt.c.thing2_id, link_tt.c.date >= date, link_dt.c.key == 'sr_id', link_dt.c.thing_id == rt.c.thing2_id), group_by = author_dt.c.value) rows = s.execute().fetchall() return [(int(r.value), r.sum) for r in rows]
def time_listings(times = ('year','month','week','day','hour', 'all')): oldests = dict((t, epoch_seconds(timeago('1 %s' % t))) for t in times if t != 'all') if 'all' in times: oldests['all'] = 0 @mr_tools.dataspec_m_thing(('author_id', int),) def process(link): assert link.thing_type == 'link' timestamp = link.timestamp fname = make_fullname(Link, link.thing_id) if not link.spam and not link.deleted: author_id = link.author_id ups, downs = link.ups, link.downs sc = score(ups, downs) contr = controversy(ups, downs) h = _hot(ups, downs, timestamp) for tkey, oldest in oldests.iteritems(): if timestamp > oldest: yield ('user-top-%s-%d' % (tkey, author_id), sc, timestamp, fname) yield ('user-controversial-%s-%d' % (tkey, author_id), contr, timestamp, fname) if tkey == 'all': yield ('user-new-%s-%d' % (tkey, author_id), timestamp, timestamp, fname) yield ('user-hot-%s-%d' % (tkey, author_id), h, timestamp, fname) mr_tools.mr_map(process)
def get_hot(srs, only_fullnames=False): """Get the (fullname, hotness, epoch_seconds) for the hottest links in a subreddit. Use the query-cache to avoid some lookups if we can.""" from r2.lib.db.thing import Query from r2.lib.db.queries import CachedResults ret = [] queries = [sr.get_links("hot", "all") for sr in srs] # fetch these all in one go cachedresults = filter(lambda q: isinstance(q, CachedResults), queries) CachedResults.fetch_multi(cachedresults) for q in queries: if isinstance(q, Query): links = cached_query(q, sr) res = [(link._fullname, link._hot, epoch_seconds(link._date)) for link in links] elif isinstance(q, CachedResults): # we're relying on an implementation detail of # CachedResults here, where it's storing tuples that look # exactly like the return-type we want, to make our # sorting a bit cheaper res = list(q.data) # remove any that are too old age_limit = epoch_seconds(utils.timeago("%d days" % g.HOT_PAGE_AGE)) res = [(fname if only_fullnames else (fname, hot, date)) for (fname, hot, date) in res if date > age_limit] ret.append(res) return ret
def time_listings(times=('year', 'month', 'week', 'day', 'hour', 'all')): oldests = dict( (t, epoch_seconds(timeago('1 %s' % t))) for t in times if t != 'all') if 'all' in times: oldests['all'] = 0 @mr_tools.dataspec_m_thing( ('author_id', int), ) def process(link): assert link.thing_type == 'link' timestamp = link.timestamp fname = make_fullname(Link, link.thing_id) if not link.spam and not link.deleted: author_id = link.author_id ups, downs = link.ups, link.downs sc = score(ups, downs) contr = controversy(ups, downs) h = _hot(ups, downs, timestamp) for tkey, oldest in oldests.iteritems(): if timestamp > oldest: yield ('user-top-%s-%d' % (tkey, author_id), sc, timestamp, fname) yield ('user-controversial-%s-%d' % (tkey, author_id), contr, timestamp, fname) if tkey == 'all': yield ('user-new-%s-%d' % (tkey, author_id), timestamp, timestamp, fname) yield ('user-hot-%s-%d' % (tkey, author_id), h, timestamp, fname) mr_tools.mr_map(process)
def get_hot(srs, only_fullnames=False): """Get the (fullname, hotness, epoch_seconds) for the hottest links in a subreddit. Use the query-cache to avoid some lookups if we can.""" from r2.lib.db.thing import Query from r2.lib.db.queries import CachedResults ret = [] queries = [sr.get_links('hot', 'all') for sr in srs] # fetch these all in one go cachedresults = filter(lambda q: isinstance(q, CachedResults), queries) CachedResults.fetch_multi(cachedresults) for q in queries: if isinstance(q, Query): links = cached_query(q, sr) res = [(link._fullname, link._hot, epoch_seconds(link._date)) for link in links] elif isinstance(q, CachedResults): # we're relying on an implementation detail of # CachedResults here, where it's storing tuples that look # exactly like the return-type we want, to make our # sorting a bit cheaper res = list(q.data) # remove any that are too old age_limit = epoch_seconds(utils.timeago('%d days' % g.HOT_PAGE_AGE)) res = [(fname if only_fullnames else (fname, hot, date)) for (fname, hot, date) in res if date > age_limit] ret.append(res) return ret
def add_props(cls, user, wrapped): from r2.lib.count import incr_counts from r2.lib.media import thumbnail_url from r2.lib.utils import timeago saved = Link._saved(user, wrapped) if user else {} hidden = Link._hidden(user, wrapped) if user else {} #clicked = Link._clicked(user, wrapped) if user else {} clicked = {} for item in wrapped: show_media = (c.user.pref_media == 'on' or (item.promoted and item.has_thumbnail and c.user.pref_media != 'off') or (c.user.pref_media == 'subreddit' and item.subreddit.show_media)) if not show_media: item.thumbnail = "" elif item.has_thumbnail: item.thumbnail = thumbnail_url(item) else: item.thumbnail = g.default_thumb item.score = max(0, item.score) item.domain = (domain(item.url) if not item.is_self else 'self.' + item.subreddit.name) if not hasattr(item,'top_link'): item.top_link = False item.urlprefix = '' item.saved = bool(saved.get((user, item, 'save'))) item.hidden = bool(hidden.get((user, item, 'hide'))) item.clicked = bool(clicked.get((user, item, 'click'))) item.num = None item.score_fmt = Score.number_only item.permalink = item.make_permalink(item.subreddit) if item.is_self: item.url = item.make_permalink(item.subreddit, force_domain = True) if c.user_is_admin: item.hide_score = False elif item.promoted: item.hide_score = True elif c.user == item.author: item.hide_score = False elif item._date > timeago("2 hours"): item.hide_score = True else: item.hide_score = False if c.user_is_loggedin and item.author._id == c.user._id: item.nofollow = False elif item.score <= 1 or item._spam or item.author._spam: item.nofollow = True else: item.nofollow = False if c.user_is_loggedin: incr_counts(wrapped)
def share(link, emails, from_name = "", reply_to = "", body = ""): """Queues a 'share link' email.""" now = datetime.datetime.now(g.tz) ival = now - timeago(g.new_link_share_delay) date = max(now,link._date + ival) Email.handler.add_to_queue(c.user, link, emails, from_name, g.share_reply, date, request.ip, Email.Kind.SHARE, body = body, reply_to = reply_to)
def simplified_timesince(date, include_tense=True): if date > timeago("1 minute"): return _("just now") since = [] since.append(timesince(date)) if include_tense: since.append(_("ago")) return " ".join(since)
def simplified_timesince(date, include_tense=True): if date > timeago("1 minute"): return _("just now") since = timesince(date) if include_tense: return _("%s ago") % since else: return since
def fix_all_broken_things(delete=False): from r2.models import Link, Comment # 2009-07-21 is the first broken thing at the time of writing. from_time = datetime.datetime(2009, 7, 21, tzinfo=g.tz) to_time = utils.timeago("60 seconds") for (cls, attrs) in ((Link, ("author_id", "sr_id")), (Comment, ("author_id", "sr_id", "body", "link_id"))): utils.find_broken_things(cls, attrs, from_time, to_time, delete=delete)
def share(link, emails, from_name = "", reply_to = "", body = ""): """Queues a 'share link' email.""" now = datetime.datetime.now(g.tz) ival = now - timeago(g.new_link_share_delay) date = max(now,link._date + ival) Email.handler.add_to_queue(c.user, emails, from_name, g.share_reply, Email.Kind.SHARE, date = date, body = body, reply_to = reply_to, thing = link)
def _get_cutoffs(intervals): cutoffs = {} for interval in intervals: if interval == "all": cutoffs["all"] = 0.0 else: cutoffs[interval] = epoch_seconds(timeago("1 %s" % interval)) return cutoffs
def fix_all_broken_things(delete=False): from r2.models import Link, Comment # 2009-07-21 is the first broken thing at the time of writing. from_time = datetime.datetime(2009, 7, 21, tzinfo=g.tz) to_time = utils.timeago('60 seconds') for (cls, attrs) in ((Link, ('author_id', 'sr_id')), (Comment, ('author_id', 'sr_id', 'body', 'link_id'))): utils.find_broken_things(cls, attrs, from_time, to_time, delete=delete)
def keep_fn(self): """For merged time-listings, don't show items that are too old (this can happen when mr_top hasn't run in a while)""" if self.time != 'all' and c.default_sr: oldest = timeago('1 %s' % (str(self.time),)) def keep(item): return item._date > oldest and item.keep_item(item) return keep else: return ListingController.keep_fn(self)
def time_listings(times=('all', )): oldests = dict( (t, epoch_seconds(timeago('1 %s' % t))) for t in times if t != "all") oldests['all'] = epoch_seconds(timeago('10 years')) @mr_tools.dataspec_m_thing( ("url", str), ) def process(link): assert link.thing_type == 'link' timestamp = link.timestamp fname = make_fullname(Link, link.thing_id) if not link.spam and not link.deleted: if link.url: domains = UrlParser(link.url).domain_permutations() else: domains = [] ups, downs = link.ups, link.downs for tkey, oldest in oldests.iteritems(): if timestamp > oldest: sc = score(ups, downs) contr = controversy(ups, downs) h = _hot(ups, downs, timestamp) upvotes = upvotes(ups) for domain in domains: yield ('domain/top/%s/%s' % (tkey, domain), sc, timestamp, fname) yield ('domain/%s/%s/%s' % (g.voting_upvote_path, tkey, domain), upvotes, timestamp, fname) yield ('domain/%s/%s/%s' % (g.voting_controversial_path, tkey, domain), contr, timestamp, fname) if tkey == "all": yield ('domain/hot/%s/%s' % (tkey, domain), h, timestamp, fname) yield ('domain/new/%s/%s' % (tkey, domain), timestamp, timestamp, fname) mr_tools.mr_map(process)
def port_cassahides(): from r2.models import SaveHide, CassandraHide from r2.lib.db.tdb_cassandra import CL from r2.lib.db.operators import desc from r2.lib.utils import fetch_things2, timeago, progress q = SaveHide._query(SaveHide.c._date > timeago("1 week"), SaveHide.c._name == "hide", sort=desc("_date")) q = fetch_things2(q) q = progress(q, estimate=1953374) for sh in q: CassandraHide._hide(sh._thing1, sh._thing2, write_consistency_level=CL.ONE)
def all_comments(): q = Comment._query(Comment.c._score > 2, Comment.c.sr_id != 6, Comment.c._date > timeago('1 weeks'), sort = desc('_date'), limit = 200, data = True) comments = list(q) while comments: for l in comments: yield l comments = list(q._after(l))
def append_date_clause(self, table, select, all_time=None): """Create the date portion of a where clause based on the time period specified.""" if all_time: return select if self.period and not self.date: select.append_whereclause(table.c.date > timeago(self.period)) if self.date: seconds = 24 * 60 * 60 wheredate = dt.datetime.strptime(self.date, "%Y%m%d") select.append_whereclause(table.c.date >= wheredate) select.append_whereclause((table.c.date < wheredate + dt.timedelta(0, seconds))) return select
def vote_stats(config): stats = {} link_votes = Vote.rel(Account, Link) comment_votes = Vote.rel(Account, Comment) for name, rel in (('link', link_votes), ('comment', comment_votes)): table = get_rel_table(rel._type_id)[0] q = table.count(table.c.date > timeago('1 day')) stats[name+'_vote_count_past_day'] = q.execute().fetchone()[0] stats['vote_count_past_day'] = stats['link_vote_count_past_day'] + stats['comment_vote_count_past_day'] return stats
def append_date_clause(self, table, select, all_time=None): """Create the date portion of a where clause based on the time period specified.""" if all_time: return select if self.period and not self.date: select.append_whereclause(table.c.date > timeago(self.period)) if self.date: seconds = 24 * 60 * 60 wheredate = dt.datetime.strptime(self.date, "%Y%m%d") select.append_whereclause(table.c.date >= wheredate) select.append_whereclause( (table.c.date < wheredate + dt.timedelta(0, seconds))) return select
def reindex_all(types = None, delete_all_first=False): """ Called from `paster run` to totally re-index everything in the database. Spawns a thread to connect to Solr, and sends it tokenised Things """ global indexed_types start_t = datetime.now() if not types: types = indexed_types # We don't want the default thread-local cache (which is just a # dict) to grow un-bounded (normally, we'd use # utils.set_emptying_cache, except that that preserves memcached, # and we don't even want to get memcached for total indexing, # because it would dump out more recent stuff) g.cache.caches = (SelfEmptyingCache(),) # + g.cache.caches[1:] count = 0 q=Queue(100) indexer=Thread(target=indexer_worker, args=(q,delete_all_first)) indexer.start() try: for cls in types: for batch in fetch_batches(cls,1000, timeago("50 years"), start_t): r = tokenize_things([ x for x in batch if not x._spam and not x._deleted ]) count += len(r) print ("Processing %s #%d(%s): %s" % (cls.__name__, count, q.qsize(), r[0]['contents'])) if indexer.isAlive(): q.put(r) else: raise Exception("'tis a shame that I have but one thread to give") q.put("done") indexer.join() except object,e: if indexer.isAlive(): q.put(e,timeout=30) raise e
def time_listings(intervals): cutoff_by_interval = { interval: epoch_seconds(timeago("1 %s" % interval)) for interval in intervals } @mr_tools.dataspec_m_thing( ("url", str), ("sr_id", int), ("author_id", int), ) def process(thing): if thing.deleted: return thing_cls = thingcls_by_name[thing.thing_type] fname = make_fullname(thing_cls, thing.thing_id) thing_score = score(thing.ups, thing.downs) thing_controversy = controversy(thing.ups, thing.downs) for interval, cutoff in cutoff_by_interval.iteritems(): if thing.timestamp < cutoff: continue yield ("user/%s/top/%s/%d" % (thing.thing_type, interval, thing.author_id), thing_score, thing.timestamp, fname) yield ("user/%s/controversial/%s/%d" % (thing.thing_type, interval, thing.author_id), thing_controversy, thing.timestamp, fname) if thing.spam: continue if thing.thing_type == "link": yield ("sr/link/top/%s/%d" % (interval, thing.sr_id), thing_score, thing.timestamp, fname) yield ("sr/link/controversial/%s/%d" % (interval, thing.sr_id), thing_controversy, thing.timestamp, fname) if thing.url: for domain in UrlParser(thing.url).domain_permutations(): yield ("domain/link/top/%s/%s" % (interval, domain), thing_score, thing.timestamp, fname) yield ("domain/link/controversial/%s/%s" % (interval, domain), thing_controversy, thing.timestamp, fname) mr_tools.mr_map(process)
def time_listings(times = ('all',)): oldests = dict((t, epoch_seconds(timeago('1 %s' % t))) for t in times if t != "all") oldests['all'] = epoch_seconds(timeago('10 years')) @mr_tools.dataspec_m_thing(("url", str),) def process(link): assert link.thing_type == 'link' timestamp = link.timestamp fname = make_fullname(Link, link.thing_id) if not link.spam and not link.deleted: if link.url: domains = UrlParser(link.url).domain_permutations() else: domains = [] ups, downs = link.ups, link.downs for tkey, oldest in oldests.iteritems(): if timestamp > oldest: sc = score(ups, downs) contr = controversy(ups, downs) h = _hot(ups, downs, timestamp) for domain in domains: yield ('domain/top/%s/%s' % (tkey, domain), sc, timestamp, fname) yield ('domain/controversial/%s/%s' % (tkey, domain), contr, timestamp, fname) if tkey == "all": yield ('domain/hot/%s/%s' % (tkey, domain), h, timestamp, fname) yield ('domain/new/%s/%s' % (tkey, domain), timestamp, timestamp, fname) mr_tools.mr_map(process)
def port_cassahides(): from r2.models import SaveHide, CassandraHide from r2.lib.db.tdb_cassandra import CL from r2.lib.db.operators import desc from r2.lib.utils import fetch_things2, timeago, progress q = SaveHide._query(SaveHide.c._date > timeago('1 week'), SaveHide.c._name == 'hide', sort=desc('_date')) q = fetch_things2(q) q = progress(q, estimate=1953374) for sh in q: CassandraHide._hide(sh._thing1, sh._thing2, write_consistency_level=CL.ONE)
def delete_old(cls, age="3 days", limit=10000): cutoff = timeago(age) q = cls._query(cls.c._date < cutoff) q._limit = limit accounts = set() defendants = set() for j in q: accounts.add(j._thing1) defendants.add(j._thing2) j._delete() for a in accounts: Jury.by_account(a, _update=True) for d in defendants: Jury.by_defendant(d, _update=True)
def preflight_check(self): if getattr(self, 'force_run', False): return True sr_id = getattr(self, 'batched_time_srid', None) if not sr_id: return True # this is a special query that tries to run less often, see # the discussion about batched_time_times sr = Subreddit._byID(sr_id, data=True) if (self.iden in getattr(sr, 'last_batch_query', {}) and sr.last_batch_query[self.iden] > utils.timeago('1 day')): # this has been done in the last 24 hours, so we should skip it return False return True
def __init__(self, q, sort, fields = [], subreddits = [], authors = [], types = [], timerange = None, spam = False, deleted = False): self.q = q self.fields = fields self.sort = sort self.subreddits = subreddits self.authors = authors self.types = types self.spam = spam self.deleted = deleted if timerange in ['hour','week','day','month','year']: self.timerange = (timeago("1 %s" % timerange),"NOW") elif timerange == 'all' or timerange is None: self.timerange = None else: self.timerange = timerange
def keep(item): """Avoid showing links that are too young, to give time for things like the spam filter and thumbnail fetcher to act on them before releasing them into the wild""" wouldkeep = item.keep_item(item) if c.user_is_loggedin and (c.user_is_admin or item.subreddit.is_moderator(c.user)): # let admins and moderators see them regardless return wouldkeep elif wouldkeep and c.user_is_loggedin and c.user._id == item.author_id: # also let the author of the link see them return True elif item._date > timeago(g.new_incubation): # it's too young to show yet return False else: # otherwise, fall back to the regular logic (don't # show hidden links, etc) return wouldkeep
def time_listings(intervals): cutoff_by_interval = {interval: epoch_seconds(timeago("1 %s" % interval)) for interval in intervals} @mr_tools.dataspec_m_thing( ("url", str), ("sr_id", int), ("author_id", int), ) def process(thing): if thing.deleted: return thing_cls = thingcls_by_name[thing.thing_type] fname = make_fullname(thing_cls, thing.thing_id) thing_score = score(thing.ups, thing.downs) thing_controversy = controversy(thing.ups, thing.downs) for interval, cutoff in cutoff_by_interval.iteritems(): if thing.timestamp < cutoff: continue yield ("user/%s/top/%s/%d" % (thing.thing_type, interval, thing.author_id), thing_score, thing.timestamp, fname) yield ("user/%s/controversial/%s/%d" % (thing.thing_type, interval, thing.author_id), thing_controversy, thing.timestamp, fname) if thing.spam: continue if thing.thing_type == "link": yield ("sr/link/top/%s/%d" % (interval, thing.sr_id), thing_score, thing.timestamp, fname) yield ("sr/link/controversial/%s/%d" % (interval, thing.sr_id), thing_controversy, thing.timestamp, fname) if thing.url: for domain in UrlParser(thing.url).domain_permutations(): yield ("domain/link/top/%s/%s" % (interval, domain), thing_score, thing.timestamp, fname) yield ("domain/link/controversial/%s/%s" % (interval, domain), thing_controversy, thing.timestamp, fname) mr_tools.mr_map(process)
def delete_old(cls, age="3 days", limit=500, verbose=False): cutoff = timeago(age) q = cls._query(cls.c._date < cutoff) q._limit = limit accounts = set() defendants = set() for j in q: accounts.add(j._thing1) defendants.add(j._thing2) j._delete() for a in accounts: Jury.by_account(a, _update=True) for d in defendants: if verbose: print "Deleting juries for defendant %s" % d._fullname Jury.by_defendant(d, _update=True)
def top_user_change(period = '1 day'): rel = Vote.rel(Account, Link) rt, account, link, dt = tdb.get_rel_table(rel._type_id) author = dt.alias() date = utils.timeago(period) s = sa.select([author.c.value, sa.func.sum(sa.cast(rt.c.name, sa.Integer))], sa.and_(rt.c.date > date, author.c.thing_id == rt.c.rel_id, author.c.key == 'author_id'), group_by = author.c.value, order_by = sa.desc(sa.func.sum(sa.cast(rt.c.name, sa.Integer))), limit = 10) rows = s.execute().fetchall() return [(int(r.value), r.sum) for r in rows]
def query(self): q = SubscriptionStorage._query(SubscriptionStorage.c._thing1_id == c.user._id, SubscriptionStorage.c._t2_deleted == False, SubscriptionStorage.c._name == 'subscriptionstorage', sort = desc('_t2_interestingness'), eager_load = True, thing_data = not g.use_query_cache ) if not c.user_is_admin: q._filter(SubscriptionStorage.c._t2_spam == False) q.prewrap_fn = lambda x: x._thing2 if self.time == 'last': q._filter(SubscriptionStorage.c._date >= last_dashboard_visit()) elif self.time != 'all': q._filter(SubscriptionStorage.c._date >= timeago(queries.relation_db_times[self.time])) return q
def query(self): q = SubscriptionStorage._query( SubscriptionStorage.c._thing1_id == c.user._id, SubscriptionStorage.c._t2_deleted == False, SubscriptionStorage.c._name == 'subscriptionstorage', sort=desc('_t2_interestingness'), eager_load=True, thing_data=not g.use_query_cache) if not c.user_is_admin: q._filter(SubscriptionStorage.c._t2_spam == False) q.prewrap_fn = lambda x: x._thing2 if self.time == 'last': q._filter(SubscriptionStorage.c._date >= last_dashboard_visit()) elif self.time != 'all': q._filter(SubscriptionStorage.c._date >= timeago( queries.relation_db_times[self.time])) return q
def user_vote_change_comments(period = '1 day'): rel = Vote.rel(Account, Comment) type = tdb.rel_types_id[rel._type_id] # rt = rel table # dt = data table rt, account_tt, comment_tt, dt = type.rel_table aliases = tdb.alias_generator() author_dt = dt.alias(aliases.next()) date = utils.timeago(period) s = sa.select([author_dt.c.value, sa.func.sum(sa.cast(rt.c.name, sa.Integer))], sa.and_(rt.c.date > date, author_dt.c.thing_id == rt.c.rel_id, author_dt.c.key == 'author_id'), group_by = author_dt.c.value) rows = s.execute().fetchall() return [(int(r.value), r.sum) for r in rows]
def subreddit_stats(config): sr_counts = defaultdict(int) for kind in (Link, Comment): thing_table, data_table = get_thing_table(kind._type_id) first_id = list(kind._query(kind.c._date > timeago('1 day'), sort=asc('_date'), limit=1)) if not first_id: continue else: first_id = first_id[0]._id q = sa.select([data_table.c.value, sa.func.count(data_table.c.value)], (data_table.c.thing_id > first_id) & (data_table.c.key == 'sr_id') & (thing_table.c.thing_id == data_table.c.thing_id) & (thing_table.c.spam == False), group_by=data_table.c.value) for sr_id, count in q.execute(): sr_counts[sr_id] += count return {'subreddits_active_past_day': len(list(count for count in sr_counts.itervalues() if count > 5))}
def top_user_change(period="1 day"): rel = Vote.rel(Account, Link) type = tdb.rel_types_id[rel._type_id] # rt = rel table # dt = data table rt, account, link, dt = type.rel_table aliases = tdb.alias_generator() author = dt.alias(aliases.next()) date = utils.timeago(period) s = sa.select( [author.c.value, sa.func.sum(sa.cast(rt.c.name, sa.Integer))], sa.and_(rt.c.date > date, author.c.thing_id == rt.c.rel_id, author.c.key == "author_id"), group_by=author.c.value, order_by=sa.desc(sa.func.sum(sa.cast(rt.c.name, sa.Integer))), limit=10, ) rows = s.execute().fetchall() return [(int(r.value), r.sum) for r in rows]
def catch_up_batch_queries(): # catch up on batched_time_times queries that haven't been run # that should be, This should be cronned to run about once an # hour. The more often, the more the work of rerunning the actual # queries is spread out, but every run has a fixed-cost of looking # at every single subreddit sr_q = Subreddit._query(sort=desc('_downs'), data=True) dayago = utils.timeago('1 day') for sr in fetch_things2(sr_q): if hasattr(sr, 'last_valid_vote') and sr.last_valid_vote > dayago: # if we don't know when the last vote was, it couldn't # have been today for sort in batched_time_sorts: for time in batched_time_times: q = make_batched_time_query(sr, sort, time) if q.preflight_check(): # we haven't run the batched_time_times in the # last day add_queries([q]) # make sure that all of the jobs have been completed or processed # by the time we return worker.join()
def catch_up_batch_queries(): # catch up on batched_time_times queries that haven't been run # that should be, This should be cronned to run about once an # hour. The more often, the more the work of rerunning the actual # queries is spread out, but every run has a fixed-cost of looking # at every single subreddit sr_q = Subreddit._query(sort=desc("_downs"), data=True) dayago = utils.timeago("1 day") for sr in fetch_things2(sr_q): if hasattr(sr, "last_valid_vote") and sr.last_valid_vote > dayago: # if we don't know when the last vote was, it couldn't # have been today for sort in batched_time_sorts: for time in batched_time_times: q = make_batched_time_query(sr, sort, time) if q.preflight_check(): # we haven't run the batched_time_times in the # last day add_queries([q]) # make sure that all of the jobs have been completed or processed # by the time we return worker.join()
def user_vote_change_links(period='1 day'): rel = Vote.rel(Account, Link) type = tdb.rel_types_id[rel._type_id] # rt = rel table # dt = data table rt, account_tt, link_tt, dt = type.rel_table aliases = tdb.alias_generator() author_dt = dt.alias(aliases.next()) link_dt = tdb.types_id[Link._type_id].data_table[0].alias(aliases.next()) # Create an SQL CASE statement for the subreddit vote multiplier cases = [] for subreddit in subreddits_with_custom_karma_multiplier(): cases.append((sa.cast(link_dt.c.value, sa.Integer) == subreddit._id, subreddit.post_karma_multiplier)) cases.append( (True, g.post_karma_multiplier)) # The default article multiplier date = utils.timeago(period) s = sa.select([ author_dt.c.value, sa.func.sum(sa.cast(rt.c.name, sa.Integer) * sa.case(cases)) ], sa.and_(rt.c.date >= date, author_dt.c.thing_id == rt.c.rel_id, author_dt.c.key == 'author_id', link_tt.c.thing_id == rt.c.thing2_id, link_tt.c.date >= date, link_dt.c.key == 'sr_id', link_dt.c.thing_id == rt.c.thing2_id), group_by=author_dt.c.value) rows = s.execute().fetchall() return [(int(r.value), r.sum) for r in rows]
def short_timesince(date): # returns string in the format of '%{number}{unit}' # examples: '2d', '45m', '65d' if date > timeago("1 minute"): return _("just now") now = datetime.datetime.now(g.tz) diff = int((now - date).total_seconds()) days = diff / 86400 # 86400 = 24 hours * 60 minutes * 60 seconds per day hours = diff % 86400 / 3600 # 3600 = 60 minutes * 60 seconds per day minutes = diff % 3600 / 60 # 60 = 60 seconds per minute years = diff / 31536000 # 31536000 = 86400 * 365 days if years > 0: return "%sy" % years if days > 0: return "%sd" % days if hours > 0: return "%sh" % hours return "%sm" % minutes
def user_vote_change_comments(period='1 day'): rel = Vote.rel(Account, Comment) type = tdb.rel_types_id[rel._type_id] # rt = rel table # dt = data table rt, account_tt, comment_tt, dt = type.rel_table aliases = tdb.alias_generator() author_dt = dt.alias(aliases.next()) date = utils.timeago(period) s = sa.select( [author_dt.c.value, sa.func.sum(sa.cast(rt.c.name, sa.Integer))], sa.and_(rt.c.date >= date, author_dt.c.thing_id == rt.c.rel_id, author_dt.c.key == 'author_id', comment_tt.c.thing_id == rt.c.thing2_id, comment_tt.c.date >= date), group_by=author_dt.c.value) rows = s.execute().fetchall() return [(int(r.value), r.sum) for r in rows]
def top_user_change(period='1 day'): rel = Vote.rel(Account, Link) type = tdb.rel_types_id[rel._type_id] # rt = rel table # dt = data table rt, account, link, dt = type.rel_table aliases = tdb.alias_generator() author = dt.alias(aliases.next()) date = utils.timeago(period) s = sa.select( [author.c.value, sa.func.sum(sa.cast(rt.c.name, sa.Integer))], sa.and_(rt.c.date > date, author.c.thing_id == rt.c.rel_id, author.c.key == 'author_id'), group_by=author.c.value, order_by=sa.desc(sa.func.sum(sa.cast(rt.c.name, sa.Integer))), limit=10) rows = s.execute().fetchall() return [(int(r.value), r.sum) for r in rows]
def operator(self, time): from r2.models import Link if time != 'all': return Link.c._date >= timeago(time)
def add_props(cls, user, wrapped): from r2.lib.count import incr_counts from r2.lib.media import thumbnail_url from r2.lib.utils import timeago saved = Link._saved(user, wrapped) if user else {} hidden = Link._hidden(user, wrapped) if user else {} clicked = Link._clicked(user, wrapped) if user else {} #clicked = {} for item in wrapped: show_media = False if c.user.pref_compress: pass elif c.user.pref_media == 'on': show_media = True elif c.user.pref_media == 'subreddit' and item.subreddit.show_media: show_media = True elif (item.promoted and item.has_thumbnail and c.user.pref_media != 'off'): show_media = True if not show_media: item.thumbnail = "" elif item.has_thumbnail: item.thumbnail = thumbnail_url(item) else: item.thumbnail = g.default_thumb item.domain = (domain(item.url) if not item.is_self else 'self.' + item.subreddit.name) if not hasattr(item, 'top_link'): item.top_link = False item.urlprefix = '' item.saved = bool(saved.get((user, item, 'save'))) item.hidden = bool(hidden.get((user, item, 'hide'))) item.clicked = clicked.get((user, item, 'click')) item.num = None item.score_fmt = Score.signed_number item.permalink = item.make_permalink(item.subreddit) if item.is_self: item.url = item.make_permalink(item.subreddit, force_domain=True) if c.user_is_admin: item.hide_score = False elif item.promoted: item.hide_score = True elif c.user == item.author: item.hide_score = False elif item._date > timeago("2 hours"): item.hide_score = True else: item.hide_score = False # Don't allow users to vote on their own posts and don't # allow users to vote on collapsed posts shown when # viewing comment permalinks. item.votable = bool( c.user != item.author and not getattr(item, 'for_comment_permalink', False)) if c.user_is_loggedin and item.author._id == c.user._id: item.nofollow = False elif item.score <= 1 or item._spam or item.author._spam: item.nofollow = True else: item.nofollow = False if c.user_is_loggedin and item.subreddit.name == c.user.draft_sr_name: item.draft = True else: item.draft = False if c.user_is_loggedin: incr_counts(wrapped)
def get_link_counts(period = count_period): links = Link._query(Link.c._date >= utils.timeago(period), limit=50, data = True) return dict((l._fullname, (0, l.sr_id)) for l in links)
def only_recent(items): return filter(lambda l: l._date > utils.timeago('%d day' % g.HOT_PAGE_AGE), items)