示例#1
0
def get_hot(srs, only_fullnames=False):
    """Get the (fullname, hotness, epoch_seconds) for the hottest
       links in a subreddit. Use the query-cache to avoid some lookups
       if we can."""
    from r2.lib.db.thing import Query
    from r2.lib.db.queries import CachedResults

    ret = []
    queries = [sr.get_links('hot', 'all') for sr in srs]

    # fetch these all in one go
    cachedresults = filter(lambda q: isinstance(q, CachedResults), queries)
    CachedResults.fetch_multi(cachedresults)

    for q in queries:
        if isinstance(q, Query):
            links = cached_query(q, sr)
            res = [(link._fullname, link._hot, epoch_seconds(link._date))
                   for link in links]
        elif isinstance(q, CachedResults):
            # we're relying on an implementation detail of
            # CachedResults here, where it's storing tuples that look
            # exactly like the return-type we want, to make our
            # sorting a bit cheaper
            res = list(q.data)

        # remove any that are too old
        age_limit = epoch_seconds(utils.timeago('%d days' % g.HOT_PAGE_AGE))
        res = [(fname if only_fullnames else (fname, hot, date))
               for (fname, hot, date) in res if date > age_limit]
        ret.append(res)

    return ret
示例#2
0
def get_hot(srs, only_fullnames=False):
    """Get the (fullname, hotness, epoch_seconds) for the hottest
       links in a subreddit. Use the query-cache to avoid some lookups
       if we can."""
    from r2.lib.db.thing import Query
    from r2.lib.db.queries import CachedResults

    ret = []
    queries = [sr.get_links("hot", "all") for sr in srs]

    # fetch these all in one go
    cachedresults = filter(lambda q: isinstance(q, CachedResults), queries)
    CachedResults.fetch_multi(cachedresults)

    for q in queries:
        if isinstance(q, Query):
            links = cached_query(q, sr)
            res = [(link._fullname, link._hot, epoch_seconds(link._date)) for link in links]
        elif isinstance(q, CachedResults):
            # we're relying on an implementation detail of
            # CachedResults here, where it's storing tuples that look
            # exactly like the return-type we want, to make our
            # sorting a bit cheaper
            res = list(q.data)

        # remove any that are too old
        age_limit = epoch_seconds(utils.timeago("%d days" % g.HOT_PAGE_AGE))
        res = [(fname if only_fullnames else (fname, hot, date)) for (fname, hot, date) in res if date > age_limit]
        ret.append(res)

    return ret
示例#3
0
    def _run_changed(msgs, chan):
        start = datetime.now(g.tz)

        changed = map(lambda x: strordict_fullname(x.body), msgs)

        boost = set()
        add = set()

        # an item can request that only its boost fields be updated,
        # so we need to separate those out

        for item in changed:
            fname = item["fullname"]
            boost_only = item.get("boost_only", False)

            if fname in add:
                # we're already going to do all of the work
                continue

            if boost_only:
                boost.add(fname)
            else:
                if fname in boost:
                    # we've previously seen an instance of this fname
                    # that requested that only its boosts be updated,
                    # but now we have to update the whole thing
                    boost.remove(fname)

                add.add(fname)

        things = Thing._by_fullname(boost | add, data=True, return_dict=True)

        boost_time = add_time = 0.0
        if boost:
            boost_time = inject([things[fname] for fname in boost], boost_only=True)
        if add:
            add_time = inject([things[fname] for fname in add])

        totaltime = epoch_seconds(datetime.now(g.tz)) - epoch_seconds(start)

        print(
            "%s: %d messages: %d docs (%.2fs), %d boosts (%.2fs) in %.2fs (%d duplicates, %s remaining)"
            % (
                start,
                len(changed),
                len(add),
                add_time,
                len(boost),
                boost_time,
                totaltime,
                len(changed) - len(things),
                msgs[-1].delivery_info.get("message_count", "unknown"),
            )
        )
示例#4
0
    def _run_changed(msgs, chan):
        start = datetime.now(g.tz)

        changed = map(lambda x: strordict_fullname(x.body), msgs)

        boost = set()
        add = set()

        # an item can request that only its boost fields be updated,
        # so we need to separate those out

        for item in changed:
            fname = item['fullname']
            boost_only = item.get('boost_only', False)

            if fname in add:
                # we're already going to do all of the work
                continue

            if boost_only:
                boost.add(fname)
            else:
                if fname in boost:
                    # we've previously seen an instance of this fname
                    # that requested that only its boosts be updated,
                    # but now we have to update the whole thing
                    boost.remove(fname)

                add.add(fname)

        things = Thing._by_fullname(boost | add, data=True, return_dict=True)

        boost_time = add_time = 0.0
        if boost:
            boost_time = inject([things[fname] for fname in boost],
                                boost_only=True)
        if add:
            add_time = inject([things[fname] for fname in add])

        totaltime = epoch_seconds(datetime.now(g.tz)) - epoch_seconds(start)

        print(
            "%s: %d messages: %d docs (%.2fs), %d boosts (%.2fs) in %.2fs (%d duplicates, %s remaining)"
            % (
                start,
                len(changed),
                len(add),
                add_time,
                len(boost),
                boost_time,
                totaltime,
                len(changed) - len(things),
                msgs[-1].delivery_info.get('message_count', 'unknown'),
            ))
示例#5
0
文件: mr_top.py 项目: 3river/reddit
def time_listings(times = ('year','month','week','day','hour')):
    oldests = dict((t, epoch_seconds(timeago('1 %s' % t)))
                   for t in times)

    @mr_tools.dataspec_m_thing(("url", str),('sr_id', int),)
    def process(link):
        assert link.thing_type == 'link'

        timestamp = link.timestamp
        fname = make_fullname(Link, link.thing_id)

        if not link.spam and not link.deleted:
            sr_id = link.sr_id
            if link.url:
                domains = UrlParser(link.url).domain_permutations()
            else:
                domains = []
            ups, downs = link.ups, link.downs

            for tkey, oldest in oldests.iteritems():
                if timestamp > oldest:
                    sc = score(ups, downs)
                    contr = controversy(ups, downs)
                    yield ('sr-top-%s-%d' % (tkey, sr_id),
                           sc, timestamp, fname)
                    yield ('sr-controversial-%s-%d' % (tkey, sr_id),
                           contr, timestamp, fname)
                    for domain in domains:
                        yield ('domain/top/%s/%s' % (tkey, domain),
                               sc, timestamp, fname)
                        yield ('domain/controversial/%s/%s' % (tkey, domain),
                               contr, timestamp, fname)

    mr_tools.mr_map(process)
示例#6
0
    def process(link):
        assert link.thing_type == 'link'

        author_id = link.author_id
        timestamp = link.timestamp
        fname = make_fullname(Link, link.thing_id)

        yield 'user-submitted-%d' % author_id, timestamp, fname
        if not link.spam:
            sr_id = link.sr_id
            ups, downs = link.ups, link.downs

            yield ('sr-hot-all-%d' % sr_id, _hot(ups, downs, timestamp),
                   timestamp, fname)
            yield 'sr-new-all-%d' % sr_id, timestamp, fname
            yield 'sr-top-all-%d' % sr_id, score(ups, downs), timestamp, fname
            yield ('sr-controversial-all-%d' % sr_id,
                   controversy(ups, downs), timestamp, fname)
            for time in '1 year', '1 month', '1 week', '1 day', '1 hour':
                if timestamp > epoch_seconds(timeago(time)):
                    tkey = time.split(' ')[1]
                    yield ('sr-top-%s-%d' % (tkey, sr_id),
                           score(ups, downs), timestamp, fname)
                    yield ('sr-controversial-%s-%d' % (tkey, sr_id),
                           controversy(ups, downs),
                           timestamp, fname)
示例#7
0
def normalized_hot(sr_ids, obey_age_limit=True):
    timer = g.stats.get_timer("normalized_hot")
    timer.start()

    if not sr_ids:
        return []

    tuples_by_srid = sgm(g.cache,
                         sr_ids,
                         miss_fn=get_hot_tuples,
                         prefix='normalized_hot',
                         time=g.page_cache_time)

    if obey_age_limit:
        cutoff = datetime.now(g.tz) - timedelta(days=g.HOT_PAGE_AGE)
        oldest = epoch_seconds(cutoff)
    else:
        oldest = 0.

    merged = heapq.merge(*tuples_by_srid.values())
    generator = (link_name for ehot, hot, link_name, timestamp in merged
                 if timestamp > oldest)
    ret = list(itertools.islice(generator, MAX_LINKS))
    timer.stop()
    return ret
示例#8
0
def time_listings(times = ('year','month','week','day','hour', 'all')):
    oldests = dict((t, epoch_seconds(timeago('1 %s' % t)))
                   for t in times if t != 'all')
    if 'all' in times:
        oldests['all'] = 0

    @mr_tools.dataspec_m_thing(('author_id', int),)
    def process(link):
        assert link.thing_type == 'link'

        timestamp = link.timestamp
        fname = make_fullname(Link, link.thing_id)

        if not link.spam and not link.deleted:
            author_id = link.author_id
            ups, downs = link.ups, link.downs

            sc = score(ups, downs)
            contr = controversy(ups, downs)
            h = _hot(ups, downs, timestamp)

            for tkey, oldest in oldests.iteritems():
                if timestamp > oldest:
                    yield ('user-top-%s-%d' % (tkey, author_id),
                           sc, timestamp, fname)
                    yield ('user-controversial-%s-%d' % (tkey, author_id),
                           contr, timestamp, fname)
                    if tkey == 'all':
                        yield ('user-new-%s-%d' % (tkey, author_id),
                               timestamp, timestamp, fname)
                        yield ('user-hot-%s-%d' % (tkey, author_id),
                               h, timestamp, fname)


    mr_tools.mr_map(process)
示例#9
0
    def process(link):
        assert link.thing_type == 'link'

        author_id = link.author_id
        timestamp = link.timestamp
        fname = make_fullname(Link, link.thing_id)

        yield 'user-submitted-%d' % author_id, timestamp, fname
        if not link.spam:
            sr_id = link.sr_id
            ups, downs = link.ups, link.downs

            yield ('sr-hot-all-%d' % sr_id, _hot(ups, downs,
                                                 timestamp), timestamp, fname)
            yield 'sr-new-all-%d' % sr_id, timestamp, fname
            yield 'sr-top-all-%d' % sr_id, score(ups, downs), timestamp, fname
            yield ('sr-controversial-all-%d' % sr_id, controversy(ups, downs),
                   timestamp, fname)
            for time in '1 year', '1 month', '1 week', '1 day', '1 hour':
                if timestamp > epoch_seconds(timeago(time)):
                    tkey = time.split(' ')[1]
                    yield ('sr-top-%s-%d' % (tkey, sr_id), score(ups, downs),
                           timestamp, fname)
                    yield ('sr-controversial-%s-%d' % (tkey, sr_id),
                           controversy(ups, downs), timestamp, fname)
示例#10
0
def normalized_hot(sr_ids, obey_age_limit=True, ageweight=None):
    timer = g.stats.get_timer("normalized_hot")
    timer.start()

    if not sr_ids:
        return []

    if ageweight and feature.is_enabled("scaled_normalized_hot"):
        tuples_by_srid = get_hot_tuples(sr_ids, ageweight=ageweight)
    else:
        tuples_by_srid = sgm(g.cache, sr_ids, miss_fn=get_hot_tuples,
                             prefix='normalized_hot', time=g.page_cache_time)

    if obey_age_limit:
        cutoff = datetime.now(g.tz) - timedelta(days=g.HOT_PAGE_AGE)
        oldest = epoch_seconds(cutoff)
    else:
        oldest = 0.

    merged = heapq.merge(*tuples_by_srid.values())
    generator = (link_name for ehot, hot, link_name, timestamp in merged
                           if timestamp > oldest)
    ret = list(itertools.islice(generator, MAX_LINKS))
    timer.stop()
    return ret
示例#11
0
def time_listings(times = ('year','month','week','day','hour')):
    oldests = dict((t, epoch_seconds(timeago('1 %s' % t)))
                   for t in times)

    @mr_tools.dataspec_m_thing(("url", str),('sr_id', int),)
    def process(link):
        assert link.thing_type == 'link'

        timestamp = link.timestamp
        fname = make_fullname(Link, link.thing_id)

        if not link.spam and not link.deleted:
            sr_id = link.sr_id
            if link.url:
                domains = UrlParser(link.url).domain_permutations()
            else:
                domains = []
            ups, downs = link.ups, link.downs

            for tkey, oldest in oldests.iteritems():
                if timestamp > oldest:
                    sc = score(ups, downs)
                    contr = controversy(ups, downs)
                    yield ('sr-top-%s-%d' % (tkey, sr_id),
                           sc, timestamp, fname)
                    yield ('sr-controversial-%s-%d' % (tkey, sr_id),
                           contr, timestamp, fname)
                    for domain in domains:
                        yield ('domain/top/%s/%s' % (tkey, domain),
                               sc, timestamp, fname)
                        yield ('domain/controversial/%s/%s' % (tkey, domain),
                               contr, timestamp, fname)

    mr_tools.mr_map(process)
示例#12
0
def time_listings(times=('year', 'month', 'week', 'day', 'hour', 'all')):
    oldests = dict(
        (t, epoch_seconds(timeago('1 %s' % t))) for t in times if t != 'all')
    if 'all' in times:
        oldests['all'] = 0

    @mr_tools.dataspec_m_thing(
        ('author_id', int), )
    def process(link):
        assert link.thing_type == 'link'

        timestamp = link.timestamp
        fname = make_fullname(Link, link.thing_id)

        if not link.spam and not link.deleted:
            author_id = link.author_id
            ups, downs = link.ups, link.downs

            sc = score(ups, downs)
            contr = controversy(ups, downs)
            h = _hot(ups, downs, timestamp)

            for tkey, oldest in oldests.iteritems():
                if timestamp > oldest:
                    yield ('user-top-%s-%d' % (tkey, author_id), sc, timestamp,
                           fname)
                    yield ('user-controversial-%s-%d' % (tkey, author_id),
                           contr, timestamp, fname)
                    if tkey == 'all':
                        yield ('user-new-%s-%d' % (tkey, author_id), timestamp,
                               timestamp, fname)
                        yield ('user-hot-%s-%d' % (tkey, author_id), h,
                               timestamp, fname)

    mr_tools.mr_map(process)
示例#13
0
    def make_period_link(interval, date):
        date = date.replace(tzinfo=g.tz)  # won't be necessary after tz fixup
        if interval == "month":
            if date.month != 12:
                end = date.replace(month=date.month + 1)
            else:
                end = date.replace(month=1, year=date.year + 1)
        else:
            end = date + timedelta_by_name(interval)

        query = urllib.urlencode({
            "syntax": "cloudsearch",
            "restrict_sr": "on",
            "sort": "top",
            "q": "timestamp:{:d}..{:d}".format(int(epoch_seconds(date)),
                                               int(epoch_seconds(end))),
        })
        return "/r/%s/search?%s" % (c.site.name, query)
示例#14
0
def inject(things, boost_only=False):
    things = [x for x in things if isinstance(x, indextank_indexed_types)]

    update_things = [x for x in things if not x._spam and not x._deleted
                     and x.promoted is None
                     and getattr(x, 'sr_id', None) != -1]
    delete_things = [x for x in things if x._spam or x._deleted]

    if update_things:
        maps = maps_from_things(update_things, boost_only = boost_only)

    indexstart = epoch_seconds(datetime.now(g.tz))
    if update_things:
        inject_maps(maps, boost_only=boost_only)
    if delete_things:
        for thing in delete_things:
            delete_thing(thing)
    return epoch_seconds(datetime.now(g.tz)) - indexstart
示例#15
0
def inject(things, boost_only=False):
    things = [x for x in things if isinstance(x, indextank_indexed_types)]

    update_things = [x for x in things if not x._spam and not x._deleted
                     and x.promoted is None
                     and getattr(x, 'sr_id', None) != -1]
    delete_things = [x for x in things if x._spam or x._deleted]

    if update_things:
        maps = maps_from_things(update_things, boost_only = boost_only)

    indexstart = epoch_seconds(datetime.now(g.tz))
    if update_things:
        inject_maps(maps, boost_only=boost_only)
    if delete_things:
        for thing in delete_things:
            delete_thing(thing)
    return epoch_seconds(datetime.now(g.tz)) - indexstart
示例#16
0
def _get_sort_value(comment, sort, link=None, children=None):
    if sort == "_date":
        return epoch_seconds(comment._date)
    if sort == '_qa':
        # Responder is usually the OP, but there could be support for adding
        # other answerers in the future.
        responder_ids = link.responder_ids
        return comment._qa(children, responder_ids)
    return getattr(comment, sort)
示例#17
0
    def make_period_link(interval, date):
        date = date.replace(tzinfo=g.tz)  # won't be necessary after tz fixup
        if interval == "month":
            if date.month != 12:
                end = date.replace(month=date.month + 1)
            else:
                end = date.replace(month=1, year=date.year + 1)
        else:
            end = date + timedelta_by_name(interval)

        query = urllib.urlencode({
            "syntax": "cloudsearch",
            "restrict_sr": "on",
            "sort": "top",
            "q": "timestamp:{:d}..{:d}".format(int(epoch_seconds(date)),
                                               int(epoch_seconds(end))),
        })
        return "/r/%s/search?%s" % (c.site.name, query)
示例#18
0
def _get_cutoffs(intervals):
    cutoffs = {}
    for interval in intervals:
        if interval == "all":
            cutoffs["all"] = 0.0
        else:
            cutoffs[interval] = epoch_seconds(timeago("1 %s" % interval))

    return cutoffs
示例#19
0
def _get_sort_value(comment, sort, link, children=None):
    if sort == "_date":
        return epoch_seconds(comment._date)
    if sort == '_qa':
        # Responder is usually the OP, but there could be support for adding
        # other answerers in the future.
        responder_ids = link.responder_ids
        return comment._qa(children, responder_ids)
    return getattr(comment, sort)
示例#20
0
文件: vote.py 项目: kairyan/reddit
    def create(cls, thing1, thing2s, pgvote, vote_info):
        assert len(thing2s) == 1

        voter = pgvote._thing1
        votee = pgvote._thing2

        rowkey = cls._rowkey(pgvote._date.astimezone(VOTE_TIMEZONE).date())
        colname = (voter._id36, votee._id36)
        details = {"direction": pgvote._name, "date": epoch_seconds(pgvote._date)}
        cls._set_values(rowkey, {colname: json.dumps(details)})
示例#21
0
def time_listings(times=('all', )):
    oldests = dict(
        (t, epoch_seconds(timeago('1 %s' % t))) for t in times if t != "all")
    oldests['all'] = epoch_seconds(timeago('10 years'))

    @mr_tools.dataspec_m_thing(
        ("url", str), )
    def process(link):
        assert link.thing_type == 'link'

        timestamp = link.timestamp
        fname = make_fullname(Link, link.thing_id)

        if not link.spam and not link.deleted:
            if link.url:
                domains = UrlParser(link.url).domain_permutations()
            else:
                domains = []
            ups, downs = link.ups, link.downs

            for tkey, oldest in oldests.iteritems():
                if timestamp > oldest:
                    sc = score(ups, downs)
                    contr = controversy(ups, downs)
                    h = _hot(ups, downs, timestamp)
                    upvotes = upvotes(ups)
                    for domain in domains:
                        yield ('domain/top/%s/%s' % (tkey, domain), sc,
                               timestamp, fname)
                        yield ('domain/%s/%s/%s' %
                               (g.voting_upvote_path, tkey, domain), upvotes,
                               timestamp, fname)
                        yield ('domain/%s/%s/%s' %
                               (g.voting_controversial_path, tkey, domain),
                               contr, timestamp, fname)
                        if tkey == "all":
                            yield ('domain/hot/%s/%s' % (tkey, domain), h,
                                   timestamp, fname)
                            yield ('domain/new/%s/%s' % (tkey, domain),
                                   timestamp, timestamp, fname)

    mr_tools.mr_map(process)
示例#22
0
    def create(cls, thing1, thing2s, pgvote, vote_info):
        assert len(thing2s) == 1

        voter = pgvote._thing1
        votee = pgvote._thing2

        rowkey = cls._rowkey(pgvote._date.astimezone(VOTE_TIMEZONE).date())
        colname = (voter._id36, votee._id36)
        details = {
            "direction": pgvote._name,
            "date": epoch_seconds(pgvote._date),
        }
        cls._set_values(rowkey, {colname: json.dumps(details)})
示例#23
0
def time_listings(intervals):
    cutoff_by_interval = {
        interval: epoch_seconds(timeago("1 %s" % interval))
        for interval in intervals
    }

    @mr_tools.dataspec_m_thing(
        ("url", str),
        ("sr_id", int),
        ("author_id", int),
    )
    def process(thing):
        if thing.deleted:
            return

        thing_cls = thingcls_by_name[thing.thing_type]
        fname = make_fullname(thing_cls, thing.thing_id)
        thing_score = score(thing.ups, thing.downs)
        thing_controversy = controversy(thing.ups, thing.downs)

        for interval, cutoff in cutoff_by_interval.iteritems():
            if thing.timestamp < cutoff:
                continue

            yield ("user/%s/top/%s/%d" %
                   (thing.thing_type, interval, thing.author_id), thing_score,
                   thing.timestamp, fname)
            yield ("user/%s/controversial/%s/%d" %
                   (thing.thing_type, interval, thing.author_id),
                   thing_controversy, thing.timestamp, fname)

            if thing.spam:
                continue

            if thing.thing_type == "link":
                yield ("sr/link/top/%s/%d" % (interval, thing.sr_id),
                       thing_score, thing.timestamp, fname)
                yield ("sr/link/controversial/%s/%d" % (interval, thing.sr_id),
                       thing_controversy, thing.timestamp, fname)

                if thing.url:
                    for domain in UrlParser(thing.url).domain_permutations():
                        yield ("domain/link/top/%s/%s" % (interval, domain),
                               thing_score, thing.timestamp, fname)
                        yield ("domain/link/controversial/%s/%s" %
                               (interval, domain), thing_controversy,
                               thing.timestamp, fname)

    mr_tools.mr_map(process)
示例#24
0
def time_listings(times = ('all',)):
    oldests = dict((t, epoch_seconds(timeago('1 %s' % t)))
                   for t in times if t != "all")
    oldests['all'] = epoch_seconds(timeago('10 years'))

    @mr_tools.dataspec_m_thing(("url", str),)
    def process(link):
        assert link.thing_type == 'link'

        timestamp = link.timestamp
        fname = make_fullname(Link, link.thing_id)

        if not link.spam and not link.deleted:
            if link.url:
                domains = UrlParser(link.url).domain_permutations()
            else:
                domains = []
            ups, downs = link.ups, link.downs

            for tkey, oldest in oldests.iteritems():
                if timestamp > oldest:
                    sc = score(ups, downs)
                    contr = controversy(ups, downs)
                    h = _hot(ups, downs, timestamp)
                    for domain in domains:
                        yield ('domain/top/%s/%s' % (tkey, domain),
                               sc, timestamp, fname)
                        yield ('domain/controversial/%s/%s' % (tkey, domain),
                               contr, timestamp, fname)
                        if tkey == "all":
                            yield ('domain/hot/%s/%s' % (tkey, domain),
                                   h, timestamp, fname)
                            yield ('domain/new/%s/%s' % (tkey, domain),
                                   timestamp, timestamp, fname)

    mr_tools.mr_map(process)
示例#25
0
 def make_item_tuple(self, item):
     """Given a single 'item' from the result of a query build the tuple
     that will be stored in the query cache. It is effectively the
     fullname of the item after passing through the filter plus the
     columns of the unfiltered item to sort by."""
     filtered_item = self.filter(item)
     lst = [filtered_item._fullname]
     for col in self.sort_cols:
         #take the property of the original
         attr = getattr(item, col)
         #convert dates to epochs to take less space
         if isinstance(attr, datetime):
             attr = epoch_seconds(attr)
         lst.append(attr)
     return tuple(lst)
示例#26
0
文件: queries.py 项目: rram/reddit
 def make_item_tuple(self, item):
     """Given a single 'item' from the result of a query build the tuple
     that will be stored in the query cache. It is effectively the
     fullname of the item after passing through the filter plus the
     columns of the unfiltered item to sort by."""
     filtered_item = self.filter(item)
     lst = [filtered_item._fullname]
     for col in self.sort_cols:
         #take the property of the original 
         attr = getattr(item, col)
         #convert dates to epochs to take less space
         if isinstance(attr, datetime):
             attr = epoch_seconds(attr)
         lst.append(attr)
     return tuple(lst)
示例#27
0
文件: vote.py 项目: Acceto/reddit
    def create(cls, thing1, thing2s, pgvote, vote_info):
        assert len(thing2s) == 1

        voter = pgvote._thing1
        votee = pgvote._thing2

        details = dict(
            direction=pgvote._name,
            date=epoch_seconds(pgvote._date),
            valid_user=pgvote.valid_user,
            valid_thing=pgvote.valid_thing,
            ip=getattr(pgvote, "ip", ""),
        )
        if vote_info and isinstance(vote_info, basestring):
            details['vote_info'] = vote_info
        cls._set_values(votee._id36, {voter._id36: json.dumps(details)})
示例#28
0
    def create(cls, thing1, thing2s, pgvote):
        assert len(thing2s) == 1

        voter = pgvote._thing1
        votee = pgvote._thing2

        details = dict(
            direction=pgvote._name,
            date=epoch_seconds(pgvote._date),
            valid_user=pgvote.valid_user,
            valid_thing=pgvote.valid_thing,
            ip=getattr(pgvote, "ip", ""),
            organic=getattr(pgvote, "organic", False),
        )

        cls._set_values(voter._id36, {votee._id36: json.dumps(details)})
示例#29
0
    def create(cls, thing1, thing2s, pgvote, vote_info):
        assert len(thing2s) == 1

        voter = pgvote._thing1
        votee = pgvote._thing2

        details = dict(
            direction=pgvote._name,
            date=epoch_seconds(pgvote._date),
            valid_user=pgvote.valid_user,
            valid_thing=pgvote.valid_thing,
            ip=getattr(pgvote, "ip", ""),
        )
        if vote_info and isinstance(vote_info, basestring):
            details['vote_info'] = vote_info
        cls._set_values(votee._id36, {voter._id36: json.dumps(details)})
示例#30
0
文件: vote.py 项目: Asimov4/dobands
    def create(cls, thing1, thing2s, pgvote):
        assert len(thing2s) == 1

        voter = pgvote._thing1
        votee = pgvote._thing2

        details = dict(
            direction=pgvote._name,
            date=epoch_seconds(pgvote._date),
            valid_user=pgvote.valid_user,
            valid_thing=pgvote.valid_thing,
            ip=getattr(pgvote, "ip", ""),
            organic=getattr(pgvote, "organic", False),
        )

        cls._set_values(votee._id36, {voter._id36: json.dumps(details)})
示例#31
0
    def _make_item_tuple(self, item):
        """Return an item tuple from the result of a query.

        The item tuple is used to sort the items in a query without having to
        look them up.

        """
        filtered_item = self.filter(item)
        lst = [filtered_item._fullname]
        for col in self.sort_cols:
            # take the property of the original
            attr = getattr(item, col)
            # convert dates to epochs to take less space
            if isinstance(attr, datetime.datetime):
                attr = epoch_seconds(attr)
            lst.append(attr)
        return tuple(lst)
示例#32
0
文件: mr_top.py 项目: Bebetz/reddit
def time_listings(intervals):
    cutoff_by_interval = {interval: epoch_seconds(timeago("1 %s" % interval))
                          for interval in intervals}

    @mr_tools.dataspec_m_thing(
        ("url", str),
        ("sr_id", int),
        ("author_id", int),
    )
    def process(thing):
        if thing.deleted:
            return

        thing_cls = thingcls_by_name[thing.thing_type]
        fname = make_fullname(thing_cls, thing.thing_id)
        thing_score = score(thing.ups, thing.downs)
        thing_controversy = controversy(thing.ups, thing.downs)

        for interval, cutoff in cutoff_by_interval.iteritems():
            if thing.timestamp < cutoff:
                continue

            yield ("user/%s/top/%s/%d" % (thing.thing_type, interval, thing.author_id),
                   thing_score, thing.timestamp, fname)
            yield ("user/%s/controversial/%s/%d" % (thing.thing_type, interval, thing.author_id),
                   thing_controversy, thing.timestamp, fname)

            if thing.spam:
                continue

            if thing.thing_type == "link":
                yield ("sr/link/top/%s/%d" % (interval, thing.sr_id),
                       thing_score, thing.timestamp, fname)
                yield ("sr/link/controversial/%s/%d" % (interval, thing.sr_id),
                       thing_controversy, thing.timestamp, fname)

                if thing.url:
                    for domain in UrlParser(thing.url).domain_permutations():
                        yield ("domain/link/top/%s/%s" % (interval, domain),
                               thing_score, thing.timestamp, fname)
                        yield ("domain/link/controversial/%s/%s" % (interval, domain),
                               thing_controversy, thing.timestamp, fname)

    mr_tools.mr_map(process)
示例#33
0
def backfill_vote_details(cls):
    ninety_days = timedelta(days=90).total_seconds()
    for chunk in in_chunks(cls._all(), size=100):
        detail_chunk = defaultdict(dict)
        try:
            with VoterIPByThing._cf.batch(
                    write_consistency_level=cls._write_consistency_level) as b:
                for vote_list in chunk:
                    thing_id36 = vote_list._id
                    thing_fullname = vote_list.votee_fullname
                    details = vote_list.decode_details()
                    for detail in details:
                        voter_id36 = detail["voter_id"]
                        if "ip" in detail and detail["ip"]:
                            ip = detail["ip"]
                            redacted = dict(detail)
                            del redacted["ip"]
                            cast = detail["date"]
                            now = epoch_seconds(
                                datetime.utcnow().replace(tzinfo=g.tz))
                            ttl = ninety_days - (now - cast)
                            oneweek = ""
                            if ttl < 3600 * 24 * 7:
                                oneweek = "(<= one week left)"
                            print "Inserting %s with IP ttl %d %s" % (
                                redacted, ttl, oneweek)
                            detail_chunk[thing_id36][voter_id36] = json.dumps(
                                redacted)
                            if ttl <= 0:
                                print "Skipping bogus ttl for %s: %d" % (
                                    redacted, ttl)
                                continue
                            b.insert(thing_fullname, {voter_id36: ip}, ttl=ttl)
        except Exception:
            # Getting some really weird spurious errors here; complaints about negative
            # TTLs even though they can't possibly be negative, errors from cass
            # that have an explanation of "(why=')"
            # Just going to brute-force this through.  We might lose 100 here and there
            # but mostly it'll be intact.
            pass
        for votee_id36, valuedict in detail_chunk.iteritems():
            cls._set_values(votee_id36, valuedict)
    def process(link):
        assert link.thing_type == "link"

        author_id = link.author_id
        timestamp = link.timestamp
        fname = make_fullname(Link, link.thing_id)

        yield "user-submitted-%d" % author_id, timestamp, fname
        if not link.spam:
            sr_id = link.sr_id
            ups, downs = link.ups, link.downs

            yield ("sr-hot-all-%d" % sr_id, _hot(ups, downs, timestamp), timestamp, fname)
            yield "sr-new-all-%d" % sr_id, timestamp, fname
            yield "sr-top-all-%d" % sr_id, score(ups, downs), timestamp, fname
            yield ("sr-controversial-all-%d" % sr_id, controversy(ups, downs), timestamp, fname)
            for time in "1 year", "1 month", "1 week", "1 day", "1 hour":
                if timestamp > epoch_seconds(timeago(time)):
                    tkey = time.split(" ")[1]
                    yield ("sr-top-%s-%d" % (tkey, sr_id), score(ups, downs), timestamp, fname)
                    yield ("sr-controversial-%s-%d" % (tkey, sr_id), controversy(ups, downs), timestamp, fname)
示例#35
0
def get_hot_tuples(sr_ids, ageweight=None):
    queries_by_sr_id = {sr_id: _get_links(sr_id, sort='hot', time='all')
                        for sr_id in sr_ids}
    CachedResults.fetch_multi(queries_by_sr_id.values(), stale=True)
    tuples_by_srid = {sr_id: [] for sr_id in sr_ids}

    now_seconds = epoch_seconds(datetime.now(g.tz))

    for sr_id, q in queries_by_sr_id.iteritems():
        if not q.data:
            continue

        hot_factor = get_hot_factor(q.data[0], now_seconds, ageweight)
        for link_name, hot, timestamp in q.data[:MAX_PER_SUBREDDIT]:
            effective_hot = hot / hot_factor
            # heapq.merge sorts from smallest to largest so we need to flip
            # ehot and hot to get the hottest links first
            tuples_by_srid[sr_id].append(
                (-effective_hot, -hot, link_name, timestamp)
            )

    return tuples_by_srid
示例#36
0
def get_hot_tuples(sr_ids, ageweight=None):
    queries_by_sr_id = {sr_id: _get_links(sr_id, sort='hot', time='all')
                        for sr_id in sr_ids}
    CachedResults.fetch_multi(queries_by_sr_id.values(), stale=True)
    tuples_by_srid = {sr_id: [] for sr_id in sr_ids}

    now_seconds = epoch_seconds(datetime.now(g.tz))

    for sr_id, q in queries_by_sr_id.iteritems():
        if not q.data:
            continue

        hot_factor = get_hot_factor(q.data[0], now_seconds, ageweight)

        for link_name, hot, timestamp in q.data[:MAX_PER_SUBREDDIT]:
            effective_hot = hot / hot_factor
            # heapq.merge sorts from smallest to largest so we need to flip
            # ehot and hot to get the hottest links first
            tuples_by_srid[sr_id].append(
                (-effective_hot, -hot, link_name, timestamp)
            )

    return tuples_by_srid
def backfill_vote_details(cls):
    ninety_days = timedelta(days=90).total_seconds()
    for chunk in in_chunks(cls._all(), size=100):
        detail_chunk = defaultdict(dict)
        try:
            with VoterIPByThing._cf.batch(write_consistency_level=cls._write_consistency_level) as b:
                for vote_list in chunk:
                    thing_id36 = vote_list._id
                    thing_fullname = vote_list.votee_fullname
                    details = vote_list.decode_details()
                    for detail in details:
                        voter_id36 = detail["voter_id"]
                        if "ip" in detail and detail["ip"]:
                            ip = detail["ip"]
                            redacted = dict(detail)
                            del redacted["ip"]
                            cast = detail["date"]
                            now = epoch_seconds(datetime.utcnow().replace(tzinfo=g.tz))
                            ttl = ninety_days - (now - cast)
                            oneweek = ""
                            if ttl < 3600 * 24 * 7:
                                oneweek = "(<= one week left)"
                            print "Inserting %s with IP ttl %d %s" % (redacted, ttl, oneweek)
                            detail_chunk[thing_id36][voter_id36] = json.dumps(redacted)
                            if ttl <= 0:
                                print "Skipping bogus ttl for %s: %d" % (redacted, ttl)
                                continue
                            b.insert(thing_fullname, {voter_id36: ip}, ttl=ttl)
        except Exception:
            # Getting some really weird spurious errors here; complaints about negative
            # TTLs even though they can't possibly be negative, errors from cass
            # that have an explanation of "(why=')"
            # Just going to brute-force this through.  We might lose 100 here and there
            # but mostly it'll be intact.
            pass
        for votee_id36, valuedict in detail_chunk.iteritems():
            cls._set_values(votee_id36, valuedict)
示例#38
0
def normalized_hot(sr_ids, obey_age_limit=True, ageweight=None):
    timer = g.stats.get_timer("normalized_hot")
    timer.start()

    if not sr_ids:
        return []

    if not feature.is_enabled("scaled_normalized_hot"):
        ageweight = None

    tuples_by_srid = get_hot_tuples(sr_ids, ageweight=ageweight)

    if obey_age_limit:
        cutoff = datetime.now(g.tz) - timedelta(days=g.HOT_PAGE_AGE)
        oldest = epoch_seconds(cutoff)
    else:
        oldest = 0.

    merged = heapq.merge(*tuples_by_srid.values())
    generator = (link_name for ehot, hot, link_name, timestamp in merged
                           if timestamp > oldest)
    ret = list(itertools.islice(generator, MAX_LINKS))
    timer.stop()
    return ret
示例#39
0
def time_listings(times = ('year','month','week','day','hour')):
    oldests = dict((t, epoch_seconds(timeago('1 %s' % t)))
                   for t in times)

    @mr_tools.dataspec_m_thing(('sr_id', int),)
    def process(link):
        assert link.thing_type == 'link'

        timestamp = link.timestamp
        fname = make_fullname(Link, link.thing_id)

        if not link.spam:
            sr_id = link.sr_id
            ups, downs = link.ups, link.downs

            for tkey, oldest in oldests.iteritems():
                if timestamp > oldest:
                    yield ('sr-top-%s-%d' % (tkey, sr_id),
                           score(ups, downs), timestamp, fname)
                    yield ('sr-controversial-%s-%d' % (tkey, sr_id),
                           controversy(ups, downs),
                           timestamp, fname)

    mr_tools.mr_map(process)
示例#40
0
 def _restrict_recent(recent):
     now = datetime.now(g.tz)
     since = epoch_seconds(now - recent)
     return 'timestamp:%i..' % since
示例#41
0
 def _restrict_recent(recent):
     now = datetime.now(g.tz)
     since = epoch_seconds(now - recent)
     return 'timestamp:[%i TO *]' % since
示例#42
0
def _get_sort_value(comment, sort):
    if sort == "_date":
        return epoch_seconds(comment._date)
    return getattr(comment, sort)
def date_to_adzerk(d):
    utc_date = d - promote.timezone_offset
    epoch_milliseconds = int(epoch_seconds(utc_date) * 1000)
    return '/Date(%s)/' % epoch_milliseconds
示例#44
0
 def _serialize_date(cls, date):
     return str(epoch_seconds(date))
def date_to_adzerk(d):
    utc_date = d - promote.timezone_offset
    epoch_milliseconds = int(epoch_seconds(utc_date) * 1000)
    return '/Date(%s)/' % epoch_milliseconds
示例#46
0
def _get_sort_value(comment, sort):
    if sort == "_date":
        return epoch_seconds(comment._date)
    return getattr(comment, sort)
示例#47
0
 def _serialize_date(cls, date):
     return str(epoch_seconds(date))