Exemplo n.º 1
0
    def gen_keys():
        yield promoted_memo_key

        # just let this one do its own writing
        load_all_reddits()

        yield queries.get_all_comments().iden

        l_q = Link._query(Link.c._spam == (True, False),
                          Link.c._deleted == (True, False),
                          sort=desc('_date'),
                          data=True,
                          )
        for link in fetch_things2(l_q, verbosity):
            yield comments_key(link._id)
            yield last_modified_key(link, 'comments')

        a_q = Account._query(Account.c._spam == (True, False),
                             sort=desc('_date'),
                             )
        for account in fetch_things2(a_q, verbosity):
            yield messages_key(account._id)
            yield last_modified_key(account, 'overview')
            yield last_modified_key(account, 'commented')
            yield last_modified_key(account, 'submitted')
            yield last_modified_key(account, 'liked')
            yield last_modified_key(account, 'disliked')
            yield queries.get_comments(account, 'new', 'all').iden
            yield queries.get_submitted(account, 'new', 'all').iden
            yield queries.get_liked(account).iden
            yield queries.get_disliked(account).iden
            yield queries.get_hidden(account).iden
            yield queries.get_saved(account).iden
            yield queries.get_inbox_messages(account).iden
            yield queries.get_unread_messages(account).iden
            yield queries.get_inbox_comments(account).iden
            yield queries.get_unread_comments(account).iden
            yield queries.get_inbox_selfreply(account).iden
            yield queries.get_unread_selfreply(account).iden
            yield queries.get_sent(account).iden

        sr_q = Subreddit._query(Subreddit.c._spam == (True, False),
                                sort=desc('_date'),
                                )
        for sr in fetch_things2(sr_q, verbosity):
            yield last_modified_key(sr, 'stylesheet_contents')
            yield queries.get_links(sr, 'hot', 'all').iden
            yield queries.get_links(sr, 'new', 'all').iden

            for sort in 'top', 'controversial':
                for time in 'hour', 'day', 'week', 'month', 'year', 'all':
                    yield queries.get_links(sr, sort, time,
                                            merge_batched=False).iden
            yield queries.get_spam_links(sr).iden
            yield queries.get_spam_comments(sr).iden
            yield queries.get_reported_links(sr).iden
            yield queries.get_reported_comments(sr).iden
            yield queries.get_subreddit_messages(sr).iden
            yield queries.get_unread_subreddit_messages(sr).iden
Exemplo n.º 2
0
    def gen_keys():
        yield promoted_memo_key

        # just let this one do its own writing
        load_all_reddits()

        yield queries.get_all_comments().iden

        l_q = Link._query(Link.c._spam == (True, False),
                          Link.c._deleted == (True, False),
                          sort=desc('_date'),
                          data=True,
                          )
        for link in fetch_things2(l_q, verbosity):
            yield comments_key(link._id)
            yield last_modified_key(link, 'comments')

        a_q = Account._query(Account.c._spam == (True, False),
                             sort=desc('_date'),
                             )
        for account in fetch_things2(a_q, verbosity):
            yield messages_key(account._id)
            yield last_modified_key(account, 'overview')
            yield last_modified_key(account, 'commented')
            yield last_modified_key(account, 'submitted')
            yield last_modified_key(account, 'liked')
            yield last_modified_key(account, 'disliked')
            yield queries.get_comments(account, 'new', 'all').iden
            yield queries.get_submitted(account, 'new', 'all').iden
            yield queries.get_liked(account).iden
            yield queries.get_disliked(account).iden
            yield queries.get_hidden(account).iden
            yield queries.get_saved(account).iden
            yield queries.get_inbox_messages(account).iden
            yield queries.get_unread_messages(account).iden
            yield queries.get_inbox_comments(account).iden
            yield queries.get_unread_comments(account).iden
            yield queries.get_inbox_selfreply(account).iden
            yield queries.get_unread_selfreply(account).iden
            yield queries.get_sent(account).iden

        sr_q = Subreddit._query(Subreddit.c._spam == (True, False),
                                sort=desc('_date'),
                                )
        for sr in fetch_things2(sr_q, verbosity):
            yield last_modified_key(sr, 'stylesheet_contents')
            yield queries.get_links(sr, 'hot', 'all').iden
            yield queries.get_links(sr, 'new', 'all').iden

            for sort in 'top', 'controversial':
                for time in 'hour', 'day', 'week', 'month', 'year', 'all':
                    yield queries.get_links(sr, sort, time,
                                            merge_batched=False).iden
            yield queries.get_spam_links(sr).iden
            yield queries.get_spam_comments(sr).iden
            yield queries.get_reported_links(sr).iden
            yield queries.get_reported_comments(sr).iden
            yield queries.get_subreddit_messages(sr).iden
            yield queries.get_unread_subreddit_messages(sr).iden
Exemplo n.º 3
0
    def gen_keys():
        yield promoted_memo_key

        # just let this one do its own writing
        load_all_reddits()

        yield queries.get_all_comments().iden

        l_q = Link._query(
            Link.c._spam == (True, False), Link.c._deleted == (True, False), sort=desc("_date"), data=True
        )
        for link in fetch_things2(l_q, verbosity):
            yield comments_key(link._id)
            yield last_modified_key(link, "comments")

        a_q = Account._query(Account.c._spam == (True, False), sort=desc("_date"))
        for account in fetch_things2(a_q, verbosity):
            yield messages_key(account._id)
            yield last_modified_key(account, "overview")
            yield last_modified_key(account, "commented")
            yield last_modified_key(account, "submitted")
            yield last_modified_key(account, "liked")
            yield last_modified_key(account, "disliked")
            yield queries.get_comments(account, "new", "all").iden
            yield queries.get_submitted(account, "new", "all").iden
            yield queries.get_liked(account).iden
            yield queries.get_disliked(account).iden
            yield queries.get_hidden(account).iden
            yield queries.get_saved(account).iden
            yield queries.get_inbox_messages(account).iden
            yield queries.get_unread_messages(account).iden
            yield queries.get_inbox_comments(account).iden
            yield queries.get_unread_comments(account).iden
            yield queries.get_inbox_selfreply(account).iden
            yield queries.get_unread_selfreply(account).iden
            yield queries.get_sent(account).iden

        sr_q = Subreddit._query(Subreddit.c._spam == (True, False), sort=desc("_date"))
        for sr in fetch_things2(sr_q, verbosity):
            yield last_modified_key(sr, "stylesheet_contents")
            yield queries.get_links(sr, "hot", "all").iden
            yield queries.get_links(sr, "new", "all").iden

            for sort in "top", "controversial":
                for time in "hour", "day", "week", "month", "year", "all":
                    yield queries.get_links(sr, sort, time, merge_batched=False).iden
            yield queries.get_spam_links(sr).iden
            yield queries.get_spam_comments(sr).iden
            yield queries.get_reported_links(sr).iden
            yield queries.get_reported_comments(sr).iden
            yield queries.get_subreddit_messages(sr).iden
            yield queries.get_unread_subreddit_messages(sr).iden
Exemplo n.º 4
0
    def process_message(msgs, chan):
        """Update get_links(), the Links by Subreddit precomputed query.

        get_links() is a CachedResult which is stored in permacache. To
        update these objects we need to do a read-modify-write which requires
        obtaining a lock. Sharding these updates by subreddit allows us to run
        multiple consumers (but ideally just one per shard) to avoid lock
        contention.

        """

        from r2.lib.db.queries import add_queries, get_links

        link_names = {msg.body for msg in msgs}
        links = Link._by_fullname(link_names, return_dict=False)
        print 'Processing %r' % (links,)

        links_by_sr_id = defaultdict(list)
        for link in links:
            links_by_sr_id[link.sr_id].append(link)

        srs_by_id = Subreddit._byID(links_by_sr_id.keys(), stale=True)

        for sr_id, links in links_by_sr_id.iteritems():
            with g.stats.get_timer("link_vote_processor.subreddit_queries"):
                sr = srs_by_id[sr_id]
                add_queries(
                    queries=[get_links(sr, sort, "all") for sort in SORTS],
                    insert_items=links,
                )
Exemplo n.º 5
0
def store_keys(key, maxes):
    # we're building queries from queries.py, but we could avoid this
    # by making the queries ourselves if we wanted to avoid the
    # individual lookups for accounts and subreddits
    userrel_fns = dict(liked=queries.get_liked,
                       disliked=queries.get_disliked,
                       saved=queries.get_saved,
                       hidden=queries.get_hidden)
    if key.startswith('user-'):
        acc_str, keytype, account_id = key.split('-')
        account_id = int(account_id)
        fn = queries.get_submitted if keytype == 'submitted' else queries.get_comments
        q = fn(Account._byID(account_id), 'new', 'all')
        insert_to_query(q, [(fname, float(timestamp))
                            for (timestamp, fname) in maxes])
    elif key.startswith('sr-'):
        sr_str, sort, time, sr_id = key.split('-')
        sr_id = int(sr_id)

        if sort == 'controversy':
            # I screwed this up in the mapper and it's too late to fix
            # it
            sort = 'controversial'

        q = queries.get_links(Subreddit._byID(sr_id), sort, time)
        insert_to_query(
            q, [tuple([item[-1]] + map(float, item[:-1])) for item in maxes])

    elif key.split('-')[0] in userrel_fns:
        key_type, account_id = key.split('-')
        account_id = int(account_id)
        fn = userrel_fns[key_type]
        q = fn(Account._byID(account_id))
        insert_to_query(
            q, [tuple([item[-1]] + map(float, item[:-1])) for item in maxes])
Exemplo n.º 6
0
    def process_message(msgs, chan):
        """Update get_links(), the Links by Subreddit precomputed query.

        get_links() is a CachedResult which is stored in permacache. To
        update these objects we need to do a read-modify-write which requires
        obtaining a lock. Sharding these updates by subreddit allows us to run
        multiple consumers (but ideally just one per shard) to avoid lock
        contention.

        """

        from r2.lib.db.queries import add_queries, get_links

        link_names = {msg.body for msg in msgs}
        links = Link._by_fullname(link_names, return_dict=False)
        print 'Processing %r' % (links, )

        links_by_sr_id = defaultdict(list)
        for link in links:
            links_by_sr_id[link.sr_id].append(link)

        srs_by_id = Subreddit._byID(links_by_sr_id.keys(), stale=True)

        for sr_id, links in links_by_sr_id.iteritems():
            with g.stats.get_timer("link_vote_processor.subreddit_queries"):
                sr = srs_by_id[sr_id]
                add_queries(
                    queries=[get_links(sr, sort, "all") for sort in SORTS],
                    insert_items=links,
                )
Exemplo n.º 7
0
    def get_links(self, sort, time, link_cls=None):
        from r2.lib.db import queries
        from r2.models import Link

        if not link_cls:
            link_cls = Link
        return queries.get_links(self, sort, time, link_cls)
Exemplo n.º 8
0
    def get_links_sr_ids(self, sr_ids, sort, time, link_cls=None):
        from r2.lib.db import queries
        from r2.models import Link

        if not link_cls:
            link_cls = Link

        if not sr_ids:
            srs = []
        else:
            srs = Subreddit._byID(sr_ids, return_dict=False)

        if g.use_query_cache:
            results = []
            for sr in srs:
                results.append(queries.get_links(sr, sort, time))
            return queries.merge_cached_results(*results)
        else:
            q = link_cls._query(link_cls.c.sr_id == sr_ids,
                                sort=queries.db_sort(sort))
            if sort == 'toplinks':
                q._filter(link_cls.c.top_link == True)
            elif sort == 'blessed':
                q._filter(link_cls.c.blessed == True)
            if time != 'all':
                q._filter(queries.db_times[time])
            return q
Exemplo n.º 9
0
def store_keys(key, maxes):
    # we're building queries from queries.py, but we could avoid this
    # by making the queries ourselves if we wanted to avoid the
    # individual lookups for accounts and spaces
    userrel_fns = dict(
        liked=queries.get_liked, disliked=queries.get_disliked, saved=queries.get_saved, hidden=queries.get_hidden
    )
    if key.startswith("user-"):
        acc_str, keytype, account_id = key.split("-")
        account_id = int(account_id)
        fn = queries.get_submitted if keytype == "submitted" else queries.get_comments
        q = fn(Account._byID(account_id), "new", "all")
        insert_to_query(q, [(fname, float(timestamp)) for (timestamp, fname) in maxes])
    elif key.startswith("sr-"):
        sr_str, sort, time, sr_id = key.split("-")
        sr_id = int(sr_id)

        if sort == "controversy":
            # I screwed this up in the mapper and it's too late to fix
            # it
            sort = "controversial"

        q = queries.get_links(Subreddit._byID(sr_id), sort, time)
        insert_to_query(q, [tuple([item[-1]] + map(float, item[:-1])) for item in maxes])

    elif key.split("-")[0] in userrel_fns:
        key_type, account_id = key.split("-")
        account_id = int(account_id)
        fn = userrel_fns[key_type]
        q = fn(Account._byID(account_id))
        insert_to_query(q, [tuple([item[-1]] + map(float, item[:-1])) for item in maxes])
Exemplo n.º 10
0
    def get_links_sr_ids(self, sr_ids, sort, time, link_cls = None):
        from r2.lib.db import queries
        from r2.models import Link

        if not link_cls:
            link_cls = Link

        if not sr_ids:
            srs = []
        else:
            srs = Subreddit._byID(sr_ids, return_dict = False)

        if g.use_query_cache:
            results = []
            for sr in srs:
                results.append(queries.get_links(sr, sort, time))
            return queries.merge_cached_results(*results)
        else:
            q = link_cls._query(link_cls.c.sr_id == sr_ids,
                            sort = queries.db_sort(sort))
            if sort == 'toplinks':
                q._filter(link_cls.c.top_link == True)
            elif sort == 'blessed':
                q._filter(link_cls.c.blessed == True)
            if time != 'all':
                q._filter(queries.db_times[time])
            return q
Exemplo n.º 11
0
    def get_links(self, sort, time, link_cls = None):
        from r2.lib.db import queries
        from r2.models import Link

        if not link_cls:
            link_cls = Link
        return queries.get_links(self, sort, time, link_cls)
Exemplo n.º 12
0
    def get_links_sr_ids(self, sr_ids, sort, time):
        from r2.lib.db import queries

        if not sr_ids:
            return []
        else:
            srs = Subreddit._byID(sr_ids, data=True, return_dict=False)

        results = [queries.get_links(sr, sort, time) for sr in srs]
        return queries.merge_results(*results)
Exemplo n.º 13
0
    def get_links_sr_ids(self, sr_ids, sort, time):
        from r2.lib.db import queries

        if not sr_ids:
            return []
        else:
            srs = Subreddit._byID(sr_ids, data=True, return_dict=False)

        results = [queries.get_links(sr, sort, time) for sr in srs]
        return queries.merge_results(*results)
Exemplo n.º 14
0
    def test_get_links(self):
        from r2.lib.db import queries
        from r2.models import Subreddit, Account, Link, Thing

        account = Account._byID(1, data=True)
        sr = Subreddit._by_name("reddit_test0")
        link_url = self.make_unique_url()

        new_link = Link._submit("test_get_links", link_url, account, sr, "127.0.0.1", kind="link")
        queries.new_link(new_link, foreground=True)

        res = Thing._by_fullname(queries.get_links(sr, "new", "all"), return_dict=False)
        self.assert_true(len(res) > 0, "no links returned")
        self.assert_equal(new_link._id, res[0]._id)
Exemplo n.º 15
0
def store_keys(key, maxes):
    # we're building queries using queries.py, but we could make the
    # queries ourselves if we wanted to avoid the individual lookups
    # for accounts and subreddits.

    # Note that we're only generating the 'sr-' type queries here, but
    # we're also able to process the other listings generated by the
    # old migrate.mr_permacache for convenience

    userrel_fns = dict(liked = queries.get_liked,
                       disliked = queries.get_disliked,
                       saved = queries.get_saved,
                       hidden = queries.get_hidden)

    if key.startswith('user-'):
        acc_str, keytype, account_id = key.split('-')
        account_id = int(account_id)
        fn = queries.get_submitted if keytype == 'submitted' else queries.get_comments
        q = fn(Account._byID(account_id), 'new', 'all')
        q._insert_tuples([(fname, float(timestamp))
                    for (timestamp, fname)
                    in maxes])

    elif key.startswith('sr-'):
        sr_str, sort, time, sr_id = key.split('-')
        sr_id = int(sr_id)

        if sort == 'controversy':
            # I screwed this up in the mapper and it's too late to fix
            # it
            sort = 'controversial'

        q = queries.get_links(Subreddit._byID(sr_id), sort, time)
        q._insert_tuples([tuple([item[-1]] + map(float, item[:-1]))
                    for item in maxes])
    elif key.startswith('domain/'):
        d_str, sort, time, domain = key.split('/')
        q = queries.get_domain_links(domain, sort, time)
        q._insert_tuples([tuple([item[-1]] + map(float, item[:-1]))
                    for item in maxes])


    elif key.split('-')[0] in userrel_fns:
        key_type, account_id = key.split('-')
        account_id = int(account_id)
        fn = userrel_fns[key_type]
        q = fn(Account._byID(account_id))
        q._insert_tuples([tuple([item[-1]] + map(float, item[:-1]))
                    for item in maxes])
Exemplo n.º 16
0
def store_keys(key, maxes):
    # we're building queries using queries.py, but we could make the
    # queries ourselves if we wanted to avoid the individual lookups
    # for accounts and subreddits.

    # Note that we're only generating the 'sr-' type queries here, but
    # we're also able to process the other listings generated by the
    # old migrate.mr_permacache for convenience

    userrel_fns = dict(liked = queries.get_liked,
                       disliked = queries.get_disliked,
                       saved = queries.get_saved,
                       hidden = queries.get_hidden)

    if key.startswith('user-'):
        acc_str, keytype, account_id = key.split('-')
        account_id = int(account_id)
        fn = queries.get_submitted if keytype == 'submitted' else queries.get_comments
        q = fn(Account._byID(account_id), 'new', 'all')
        q._insert_tuples([(fname, float(timestamp))
                    for (timestamp, fname)
                    in maxes])

    elif key.startswith('sr-'):
        sr_str, sort, time, sr_id = key.split('-')
        sr_id = int(sr_id)

        if sort == 'controversy':
            # I screwed this up in the mapper and it's too late to fix
            # it
            sort = 'controversial'

        q = queries.get_links(Subreddit._byID(sr_id), sort, time)
        q._insert_tuples([tuple([item[-1]] + map(float, item[:-1]))
                    for item in maxes])
    elif key.startswith('domain/'):
        d_str, sort, time, domain = key.split('/')
        q = queries.get_domain_links(domain, sort, time)
        q._insert_tuples([tuple([item[-1]] + map(float, item[:-1]))
                    for item in maxes])


    elif key.split('-')[0] in userrel_fns:
        key_type, account_id = key.split('-')
        account_id = int(account_id)
        fn = userrel_fns[key_type]
        q = fn(Account._byID(account_id))
        q._insert_tuples([tuple([item[-1]] + map(float, item[:-1]))
                    for item in maxes])
Exemplo n.º 17
0
def get_recent_name_submissions():
    link_fullnames = list(queries.get_links(SERVERNAME_SR, "new", "all"))
    links = chain.from_iterable(Thing._by_fullname(chunk, return_dict=False)
                                for chunk in in_chunks(link_fullnames))

    for link in links:
        if link._deleted or link._spam:
            continue

        # OH GOD WHAT HAVE YOU POSTED IN MY LOVELY AUTOMATED SUBREDDIT!?
        if (not hasattr(link, "revenue_date") or
            not hasattr(link, "revenue_bucket") or
            not hasattr(link, "server_names")):
            continue

        yield link
Exemplo n.º 18
0
    def get_links_sr_ids(self, sr_ids, sort, time):
        from r2.lib.db import queries
        from r2.models import Link

        if not sr_ids:
            return []
        else:
            srs = Subreddit._byID(sr_ids, data=True, return_dict=False)

        if g.use_query_cache:
            results = [queries.get_links(sr, sort, time) for sr in srs]
            return queries.merge_results(*results)
        else:
            q = Link._query(Link.c.sr_id == sr_ids, sort=queries.db_sort(sort), data=True)
            if time != "all":
                q._filter(queries.db_times[time])
            return q
Exemplo n.º 19
0
    def get_links_sr_ids(self, sr_ids, sort, time):
        from r2.lib.db import queries
        from r2.models import Link

        if not sr_ids:
            return []
        else:
            srs = Subreddit._byID(sr_ids, return_dict=False)

        if g.use_query_cache:
            results = [queries.get_links(sr, sort, time) for sr in srs]
            return queries.merge_results(*results)
        else:
            q = Link._query(Link.c.sr_id == sr_ids, sort=queries.db_sort(sort))
            if time != 'all':
                q._filter(queries.db_times[time])
            return q
Exemplo n.º 20
0
    def test_get_files(self):
        from r2.lib.db import queries
        from r2.models import Subreddit, Account, Link, Thing

        account = Account._byID(1, data=True)
        sr = Subreddit._by_name("reddit_test0")
        link_url = self.make_unique_url()

        new_link = Link._submit("test_get_files", link_url, account, sr, "127.0.0.1", kind="file")
        queries.new_link(new_link, foreground=True)

        # make sure it returns like a normal link
        res = Thing._by_fullname(queries.get_links(sr, "new", "all"), return_dict=False)
        self.assert_true(len(res) > 0, "no links returned")
        self.assert_equal(new_link._id, res[0]._id)

        # should return with a kind = 'file' filter
        res = list(queries.get_files(sr))
        self.assert_true(len(res) > 0, "no links returned")
        self.assert_equal(new_link._id, res[0]._id)
Exemplo n.º 21
0
    def get_links_sr_ids(self, sr_ids, sort, time):
        from r2.lib.db import queries
        from r2.models import Link
	from r2.lib.normalized_hot import expand_children

        if not sr_ids:
            return []
        else:
            srs = Subsciteit._byID(sr_ids, data=True, return_dict = False)

        if g.use_query_cache:
	    srs = expand_children(srs)
            results = [queries.get_links(sr, sort, time,no_children=True)
                       for sr in srs]
            return queries.merge_results(*results)
        else:
	    sr_ids = expand_children(sr_ids,byID=True)
            q = Link._query(Link.c.sr_id == sr_ids,
                            sort = queries.db_sort(sort), data=True)
            if time != 'all':
                q._filter(queries.db_times[time])
            return q
Exemplo n.º 22
0
    def get_links(self, sort, time):
        from r2.lib.db import queries

        return queries.get_links(self, sort, time)
Exemplo n.º 23
0
 def get_links(self, sort, time):
     from r2.lib.db import queries
     return queries.get_links(self, sort, time)