Exemplo n.º 1
0
    def pre(self):

        c.start_time = datetime.now(g.tz)
        g.reset_caches()

        c.domain_prefix = request.environ.get("reddit-domain-prefix",
                                              g.domain_prefix)
        c.secure = request.host in g.secure_domains

        #check if user-agent needs a dose of rate-limiting
        if not c.error_page:
            ratelimit_throttled()
            ratelimit_agents()

        c.allow_loggedin_cache = False

        c.show_wiki_actions = False

        # the domain has to be set before Cookies get initialized
        set_subreddit()
        c.errors = ErrorSet()
        c.cookies = Cookies()
        # if an rss feed, this will also log the user in if a feed=
        # GET param is included
        set_content_type()
Exemplo n.º 2
0
    def pre(self):
        action = request.environ["pylons.routes_dict"].get("action")
        if action:
            c.request_timer = g.stats.get_timer(request_timer_name(action))
        else:
            c.request_timer = SimpleSillyStub()

        c.response_wrappers = []
        c.start_time = datetime.now(g.tz)
        c.request_timer.start()
        g.reset_caches()

        c.domain_prefix = request.environ.get("reddit-domain-prefix",
                                              g.domain_prefix)
        c.secure = request.host in g.secure_domains

        #check if user-agent needs a dose of rate-limiting
        if not c.error_page:
            ratelimit_throttled()
            ratelimit_agents()

        c.allow_loggedin_cache = False

        c.show_wiki_actions = False

        # the domain has to be set before Cookies get initialized
        set_subreddit()
        c.errors = ErrorSet()
        c.cookies = Cookies()
        # if an rss feed, this will also log the user in if a feed=
        # GET param is included
        set_content_type()
        c.request_timer.intermediate("minimal-pre")
Exemplo n.º 3
0
    def pre(self):

        c.start_time = datetime.now(g.tz)
        g.reset_caches()

        c.domain_prefix = request.environ.get("reddit-domain-prefix",
                                              g.domain_prefix)
        c.secure = request.host in g.secure_domains

        #check if user-agent needs a dose of rate-limiting
        if not c.error_page:
            ratelimit_throttled()
            ratelimit_agents()

        c.allow_loggedin_cache = False
        
        c.show_wiki_actions = False
        
        # the domain has to be set before Cookies get initialized
        set_subreddit()
        c.errors = ErrorSet()
        c.cookies = Cookies()
        # if an rss feed, this will also log the user in if a feed=
        # GET param is included
        set_content_type()
Exemplo n.º 4
0
def handle_items(queue, callback, ack = True, limit = 1, drain = False,
                 verbose=True, sleep_time = 1):
    """Call callback() on every item in a particular queue. If the
       connection to the queue is lost, it will die. Intended to be
       used as a long-running process."""
    from pylons import c

    chan = connection_manager.get_channel()
    countdown = None

    while True:

        # NB: None != 0, so we don't need an "is not None" check here
        if countdown == 0:
            break

        msg = chan.basic_get(queue)
        if not msg and drain:
            return
        elif not msg:
            time.sleep(sleep_time)
            continue

        if countdown is None and drain and 'message_count' in msg.delivery_info:
            countdown = 1 + msg.delivery_info['message_count']

        g.reset_caches()
        c.use_write_db = {}

        items = []

        while msg and countdown != 0:
            items.append(msg)
            if countdown is not None:
                countdown -= 1
            if len(items) >= limit:
                break # the innermost loop only
            msg = chan.basic_get(queue)

        try:
            count_str = ''
            if 'message_count' in items[-1].delivery_info:
                # the count from the last message, if the count is
                # available
                count_str = '(%d remaining)' % items[-1].delivery_info['message_count']
            if verbose:
                print "%s: %d items %s" % (queue, len(items), count_str)
            callback(items, chan)

            if ack:
                # ack *all* outstanding messages
                chan.basic_ack(0, multiple=True)

            # flush any log messages printed by the callback
            sys.stdout.flush()
        except:
            for item in items:
                # explicitly reject the items that we've not processed
                chan.basic_reject(item.delivery_tag, requeue = True)
            raise
Exemplo n.º 5
0
    def pre(self):
        action = request.environ["pylons.routes_dict"].get("action")
        if action:
            c.request_timer = g.stats.get_timer(request_timer_name(action))
        else:
            c.request_timer = SimpleSillyStub()

        c.response_wrapper = None
        c.start_time = datetime.now(g.tz)
        c.request_timer.start()
        g.reset_caches()

        c.domain_prefix = request.environ.get("reddit-domain-prefix",
                                              g.domain_prefix)
        c.secure = request.host in g.secure_domains

        #check if user-agent needs a dose of rate-limiting
        if not c.error_page:
            ratelimit_throttled()
            ratelimit_agents()

        c.allow_loggedin_cache = False

        # the domain has to be set before Cookies get initialized
        set_subreddit()
        c.errors = ErrorSet()
        c.cookies = Cookies()
        # if an rss feed, this will also log the user in if a feed=
        # GET param is included
        set_content_type()
        c.request_timer.intermediate("minimal-pre")
Exemplo n.º 6
0
def handle_items(queue, callback, ack = True, limit = 1, drain = False,
                 verbose=True, sleep_time = 1):
    """Call callback() on every item in a particular queue. If the
       connection to the queue is lost, it will die. Intended to be
       used as a long-running process."""
    from pylons import c

    chan = connection_manager.get_channel()
    countdown = None

    while True:

        # NB: None != 0, so we don't need an "is not None" check here
        if countdown == 0:
            break

        msg = chan.basic_get(queue)
        if not msg and drain:
            return
        elif not msg:
            time.sleep(sleep_time)
            continue

        if countdown is None and drain and 'message_count' in msg.delivery_info:
            countdown = 1 + msg.delivery_info['message_count']

        g.reset_caches()
        c.use_write_db = {}

        items = []

        while msg and countdown != 0:
            items.append(msg)
            if countdown is not None:
                countdown -= 1
            if len(items) >= limit:
                break # the innermost loop only
            msg = chan.basic_get(queue)

        try:
            count_str = ''
            if 'message_count' in items[-1].delivery_info:
                # the count from the last message, if the count is
                # available
                count_str = '(%d remaining)' % items[-1].delivery_info['message_count']
            if verbose:
                print "%s: %d items %s" % (queue, len(items), count_str)
            callback(items, chan)

            if ack:
                # ack *all* outstanding messages
                chan.basic_ack(0, multiple=True)

            # flush any log messages printed by the callback
            sys.stdout.flush()
        except:
            for item in items:
                # explicitly reject the items that we've not processed
                chan.basic_reject(item.delivery_tag, requeue = True)
            raise
Exemplo n.º 7
0
    def pre(self):
        action = request.environ["pylons.routes_dict"].get("action")
        if action:
            if not self._get_action_handler():
                action = 'invalid'
            controller = request.environ["pylons.routes_dict"]["controller"]
            key = "{}.{}".format(controller, action)
            c.request_timer = g.stats.get_timer(request_timer_name(key))
        else:
            c.request_timer = SimpleSillyStub()

        c.response_wrapper = None
        c.start_time = datetime.now(g.tz)
        c.request_timer.start()
        g.reset_caches()

        c.domain_prefix = request.environ.get("reddit-domain-prefix",
                                              g.domain_prefix)
        c.secure = request.environ["wsgi.url_scheme"] == "https"
        c.request_origin = request.host_url

        #check if user-agent needs a dose of rate-limiting
        if not c.error_page:
            ratelimit_throttled()
            ratelimit_agents()

        c.allow_loggedin_cache = False
        c.allow_framing = False

        c.cdn_cacheable = (request.via_cdn and
                           g.login_cookie not in request.cookies)

        # the domain has to be set before Cookies get initialized
        set_subreddit()
        c.errors = ErrorSet()
        c.cookies = Cookies()
        # if an rss feed, this will also log the user in if a feed=
        # GET param is included
        set_content_type()

        c.request_timer.intermediate("minimal-pre")
        # True/False forces. None updates for most non-POST requests
        c.update_last_visit = None

        g.stats.count_string('user_agents', request.user_agent)

        if not self.defer_ratelimiting:
            self.run_sitewide_ratelimits()
            c.request_timer.intermediate("minimal-ratelimits")

        hooks.get_hook("reddit.request.minimal_begin").call()
Exemplo n.º 8
0
    def pre(self):
        action = request.environ["pylons.routes_dict"].get("action")
        if action:
            if not self._get_action_handler():
                action = 'invalid'
            c.request_timer = g.stats.get_timer(request_timer_name(action))
        else:
            c.request_timer = SimpleSillyStub()

        c.response_wrapper = None
        c.start_time = datetime.now(g.tz)
        c.request_timer.start()
        g.reset_caches()

        c.domain_prefix = request.environ.get("reddit-domain-prefix",
                                              g.domain_prefix)
        c.secure = request.host in g.secure_domains

        # wsgi.url_scheme is used in generating absolute urls, such as by webob
        # for translating some of our relative-url redirects to rfc compliant
        # absolute-url ones. TODO: consider using one of webob's methods of
        # setting wsgi.url_scheme based on incoming request headers added by
        # upstream things like stunnel/haproxy.
        if c.secure:
            request.environ["wsgi.url_scheme"] = "https"

        c.request_origin = request.host_url

        #check if user-agent needs a dose of rate-limiting
        if not c.error_page:
            ratelimit_throttled()
            ratelimit_agents()

        c.allow_loggedin_cache = False

        # the domain has to be set before Cookies get initialized
        set_subreddit()
        c.errors = ErrorSet()
        c.cookies = Cookies()
        # if an rss feed, this will also log the user in if a feed=
        # GET param is included
        set_content_type()

        c.request_timer.intermediate("minimal-pre")
        # True/False forces. None updates for most non-POST requests
        c.update_last_visit = None

        g.stats.count_string('user_agents', request.user_agent)

        hooks.get_hook("reddit.request.minimal_begin").call()
Exemplo n.º 9
0
    def pre(self):
        action = request.environ["pylons.routes_dict"].get("action")
        if action:
            if not self._get_action_handler():
                action = 'invalid'
            c.request_timer = g.stats.get_timer(request_timer_name(action))
        else:
            c.request_timer = SimpleSillyStub()

        c.response_wrapper = None
        c.start_time = datetime.now(g.tz)
        c.request_timer.start()
        g.reset_caches()

        c.domain_prefix = request.environ.get("reddit-domain-prefix",
                                              g.domain_prefix)
        c.secure = request.host in g.secure_domains

        # wsgi.url_scheme is used in generating absolute urls, such as by webob
        # for translating some of our relative-url redirects to rfc compliant
        # absolute-url ones. TODO: consider using one of webob's methods of
        # setting wsgi.url_scheme based on incoming request headers added by
        # upstream things like stunnel/haproxy.
        if c.secure:
            request.environ["wsgi.url_scheme"] = "https"

        c.request_origin = request.host_url

        #check if user-agent needs a dose of rate-limiting
        if not c.error_page:
            ratelimit_throttled()
            ratelimit_agents()

        c.allow_loggedin_cache = False

        # the domain has to be set before Cookies get initialized
        set_subreddit()
        c.errors = ErrorSet()
        c.cookies = Cookies()
        # if an rss feed, this will also log the user in if a feed=
        # GET param is included
        set_content_type()

        c.request_timer.intermediate("minimal-pre")
        # True/False forces. None updates for most non-POST requests
        c.update_last_visit = None

        g.stats.count_string('user_agents', request.user_agent)

        hooks.get_hook("reddit.request.minimal_begin").call()
Exemplo n.º 10
0
    def pre(self):
        action = request.environ["pylons.routes_dict"].get("action")
        if action:
            if not self._get_action_handler():
                action = 'invalid'
            c.request_timer = g.stats.get_timer(request_timer_name(action))
        else:
            c.request_timer = SimpleSillyStub()

        c.response_wrapper = None
        c.start_time = datetime.now(g.tz)
        c.request_timer.start()
        g.reset_caches()

        c.domain_prefix = request.environ.get("reddit-domain-prefix",
                                              g.domain_prefix)
        c.secure = request.environ["wsgi.url_scheme"] == "https"
        c.request_origin = request.host_url

        #check if user-agent needs a dose of rate-limiting
        if not c.error_page:
            ratelimit_throttled()
            ratelimit_agents()

        c.allow_loggedin_cache = False
        c.allow_framing = False

        c.cdn_cacheable = (request.via_cdn
                           and g.login_cookie not in request.cookies)

        # the domain has to be set before Cookies get initialized
        set_subreddit()
        c.errors = ErrorSet()
        c.cookies = Cookies()
        # if an rss feed, this will also log the user in if a feed=
        # GET param is included
        set_content_type()

        c.request_timer.intermediate("minimal-pre")
        # True/False forces. None updates for most non-POST requests
        c.update_last_visit = None

        g.stats.count_string('user_agents', request.user_agent)

        if not self.defer_ratelimiting:
            self.run_sitewide_ratelimits()
            c.request_timer.intermediate("minimal-ratelimits")

        hooks.get_hook("reddit.request.minimal_begin").call()
Exemplo n.º 11
0
    def _callback(msg):
        if verbose:
            count_str = ''
            if 'message_count' in msg.delivery_info:
                # the count from the last message, if the count is
                # available
                count_str = '(%d remaining)' % msg.delivery_info['message_count']

            print "%s: 1 item %s" % (queue, count_str)

        g.reset_caches()
        ret = callback(msg)
        msg.channel.basic_ack(msg.delivery_tag)
        sys.stdout.flush()
        return ret
Exemplo n.º 12
0
    def _callback(msg):
        if verbose:
            count_str = ''
            if 'message_count' in msg.delivery_info:
                # the count from the last message, if the count is
                # available
                count_str = '(%d remaining)' % msg.delivery_info['message_count']

            print "%s: 1 item %s" % (queue, count_str)

        g.reset_caches()
        ret = callback(msg)
        msg.channel.basic_ack(msg.delivery_tag)
        sys.stdout.flush()
        return ret
Exemplo n.º 13
0
def get_step_state(emr_connection, jobflowid, step_name, update=False):
    """Return the state of a step.

    If jobflowid/step_name combination is not unique this will return the state
    of the most recent step.

    """

    g.reset_caches()
    steps = get_step_states(emr_connection, jobflowid, _update=update)

    for name, state in reversed(steps):
        if name == step_name:
            return state
    else:
        return NOTFOUND
Exemplo n.º 14
0
def get_step_state(emr_connection, jobflowid, step_name, update=False):
    """Return the state of a step.

    If jobflowid/step_name combination is not unique this will return the state
    of the most recent step.

    """

    g.reset_caches()
    steps = get_step_states(emr_connection, jobflowid, _update=update)

    for name, state in reversed(steps):
        if name == step_name:
            return state
    else:
        return NOTFOUND
Exemplo n.º 15
0
    def _callback(msg):
        if verbose:
            count_str = ""
            if "message_count" in msg.delivery_info:
                # the count from the last message, if the count is
                # available
                count_str = "(%d remaining)" % msg.delivery_info["message_count"]

            print "%s: 1 item %s" % (queue, count_str)

        g.reset_caches()
        c.use_write_db = {}

        ret = callback(msg)
        msg.channel.basic_ack(msg.delivery_tag)
        sys.stdout.flush()
        return ret
Exemplo n.º 16
0
    def pre(self):
        c.start_time = datetime.now(g.tz)
        g.reset_caches()

        c.domain_prefix = request.environ.get("reddit-domain-prefix", 
                                              g.domain_prefix)
        #check if user-agent needs a dose of rate-limiting
        if not c.error_page:
            ratelimit_agents()
            ratelimit_throttled()

        c.allow_loggedin_cache = False

        # the domain has to be set before Cookies get initialized
        set_subreddit()
        c.errors = ErrorSet()
        c.cookies = Cookies()
Exemplo n.º 17
0
    def pre(self):
        c.start_time = datetime.now(g.tz)
        g.reset_caches()

        c.domain_prefix = request.environ.get("reddit-domain-prefix",
                                              g.domain_prefix)
        #check if user-agent needs a dose of rate-limiting
        if not c.error_page:
            ratelimit_agents()
            ratelimit_throttled()

        c.allow_loggedin_cache = False

        # the domain has to be set before Cookies get initialized
        set_subreddit()
        c.errors = ErrorSet()
        c.cookies = Cookies()
Exemplo n.º 18
0
def main():
    now = datetime.datetime.now(TIMEZONE)

    # calculate and store the new day's gold goal
    determine_gold_goal(now.date())

    # post a new thread if we met our revenue goal
    yesterday = (now - datetime.timedelta(days=1)).date()
    post_if_goal_reached(yesterday)

    # look at old (now complete) threads if any
    activate_requested_names(but_not=yesterday)

    # wait until all our amqp / permacache changes are flushed from the
    # in-process queue.
    worker.join()
    g.reset_caches()

    # update the sidebar with a list of names
    update_sidebar()
Exemplo n.º 19
0
    def post(self):
        c.request_timer.intermediate("action")

        if c.response_wrapper:
            content = "".join(_force_utf8(x)
                              for x in tup(response.content) if x)
            wrapped_content = c.response_wrapper(content)
            response.content = wrapped_content

        if c.user_is_loggedin and not c.allow_loggedin_cache:
            response.headers['Cache-Control'] = 'no-cache'
            response.headers['Pragma'] = 'no-cache'

        if c.deny_frames:
            response.headers["X-Frame-Options"] = "DENY"

        #set content cache
        if (g.page_cache_time
            and request.method.upper() == 'GET'
            and (not c.user_is_loggedin or c.allow_loggedin_cache)
            and not c.used_cache
            and response.status_code not in (429, 503)):
            try:
                g.pagecache.set(self.request_key(),
                                (response._current_obj(), c.cookies),
                                g.page_cache_time)
            except MemcachedError as e:
                # this codepath will actually never be hit as long as
                # the pagecache memcached client is in no_reply mode.
                g.log.warning("Ignored exception (%r) on pagecache "
                              "write for %r", e, request.path)

        # send cookies
        for k, v in c.cookies.iteritems():
            if v.dirty:
                response.set_cookie(key=k,
                                    value=quote(v.value),
                                    domain=v.domain,
                                    expires=v.expires,
                                    secure=getattr(v, 'secure', False),
                                    httponly=getattr(v, 'httponly', False))

        end_time = datetime.now(g.tz)

        # update last_visit
        if (c.user_is_loggedin and not g.disallow_db_writes and
            request.method.upper() != "POST" and
            not c.dont_update_last_visit and
            request.path != '/validuser'):
            c.user.update_last_visit(c.start_time)

        check_request(end_time)

        # this thread is probably going to be reused, but it could be
        # a while before it is. So we might as well dump the cache in
        # the mean time so that we don't have dead objects hanging
        # around taking up memory
        g.reset_caches()

        # push data to statsd
        c.request_timer.stop()
        g.stats.flush()
Exemplo n.º 20
0
    def post(self):
        response = c.response
        content = filter(None, response.content)
        if isinstance(content, (list, tuple)):
            content = ''.join(content)
        for w in c.response_wrappers:
            content = w(content)
        response.content = content
        if c.response_content_type:
            response.headers['Content-Type'] = c.response_content_type

        if c.user_is_loggedin and not c.allow_loggedin_cache:
            response.headers['Cache-Control'] = 'no-cache'
            response.headers['Pragma'] = 'no-cache'

        if c.deny_frames:
            response.headers["X-Frame-Options"] = "DENY"

        #return
        #set content cache
        if (g.page_cache_time and request.method.upper() == 'GET'
                and (not c.user_is_loggedin or c.allow_loggedin_cache)
                and not c.used_cache and response.status_code not in (429, 503)
                and response.content and response.content[0]):
            try:
                g.rendercache.set(self.request_key(), (response, c.cookies),
                                  g.page_cache_time)
            except MemcachedError:
                # the key was too big to set in the rendercache
                g.log.debug("Ignored too-big render cache")

        # send cookies
        for k, v in c.cookies.iteritems():
            if v.dirty:
                response.set_cookie(key=k,
                                    value=quote(v.value),
                                    domain=v.domain,
                                    expires=v.expires)

        end_time = datetime.now(g.tz)

        if ('pylons.routes_dict' in request.environ
                and 'action' in request.environ['pylons.routes_dict']):
            action = str(request.environ['pylons.routes_dict']['action'])
        else:
            action = "unknown"
            log_text("unknown action", "no action for %r" % path_info,
                     "warning")
        if g.usage_sampling >= 1.0 or rand.random() < g.usage_sampling:

            amqp.add_kw("usage_q",
                        start_time=c.start_time,
                        end_time=end_time,
                        sampling_rate=g.usage_sampling,
                        action=action)

        check_request(end_time)

        # this thread is probably going to be reused, but it could be
        # a while before it is. So we might as well dump the cache in
        # the mean time so that we don't have dead objects hanging
        # around taking up memory
        g.reset_caches()

        # push data to statsd
        if 'pylons.action_method' in request.environ:
            # only report web timing data if an action handler was called
            g.stats.transact('web.%s' % action,
                             (end_time - c.start_time).total_seconds())
        g.stats.flush_timing_stats()
Exemplo n.º 21
0
    def post(self):
        c.request_timer.intermediate("action")

        # if the action raised an HTTPException (i.e. it aborted) then pylons
        # will have replaced response with the exception itself.
        c.is_exception_response = getattr(response, "_exception", False)

        if c.response_wrapper and not c.is_exception_response:
            content = flatten_response(response.content)
            wrapped_content = c.response_wrapper(content)
            response.content = wrapped_content

        if c.user_is_loggedin and not c.allow_loggedin_cache:
            response.headers['Cache-Control'] = 'no-cache'
            response.headers['Pragma'] = 'no-cache'

        if c.deny_frames:
            response.headers["X-Frame-Options"] = "DENY"

        # save the result of this page to the pagecache if possible.  we
        # mustn't cache things that rely on state not tracked by request_key
        # such as If-Modified-Since headers for 304s or requesting IP for 429s.
        if (g.page_cache_time and request.method.upper() == 'GET'
                and c.can_use_pagecache and not c.used_cache
                and response.status_int not in (304, 429)
                and not response.status.startswith("5")
                and not c.is_exception_response):
            try:
                g.pagecache.set(self.request_key(),
                                (response._current_obj(), c.cookies),
                                g.page_cache_time)
            except MemcachedError as e:
                # this codepath will actually never be hit as long as
                # the pagecache memcached client is in no_reply mode.
                g.log.warning(
                    "Ignored exception (%r) on pagecache "
                    "write for %r", e, request.path)

        pragmas = [
            p.strip() for p in request.headers.get("Pragma", "").split(",")
        ]
        if g.debug or "x-reddit-pagecache" in pragmas:
            if c.can_use_pagecache:
                pagecache_state = "hit" if c.used_cache else "miss"
            else:
                pagecache_state = "disallowed"
            response.headers["X-Reddit-Pagecache"] = pagecache_state

        # send cookies
        for k, v in c.cookies.iteritems():
            if v.dirty:
                response.set_cookie(key=k,
                                    value=quote(v.value),
                                    domain=v.domain,
                                    expires=v.expires,
                                    secure=getattr(v, 'secure', False),
                                    httponly=getattr(v, 'httponly', False))

        if self.should_update_last_visit():
            c.user.update_last_visit(c.start_time)

        hooks.get_hook("reddit.request.end").call()

        # this thread is probably going to be reused, but it could be
        # a while before it is. So we might as well dump the cache in
        # the mean time so that we don't have dead objects hanging
        # around taking up memory
        g.reset_caches()

        c.request_timer.intermediate("post")

        # push data to statsd
        c.request_timer.stop()
        g.stats.flush()
Exemplo n.º 22
0
def describe_jobflows_by_ids(emr_connection, jobflow_ids, _update=False):
    g.reset_caches()
    jobflows = describe_jobflows_cached(emr_connection, _update=_update)
    return [jf for jf in jobflows if jf.jobflowid in jobflow_ids]
Exemplo n.º 23
0
    def post(self):
        response = c.response
        content = filter(None, response.content)
        if isinstance(content, (list, tuple)):
            content = "".join(content)
        for w in c.response_wrappers:
            content = w(content)
        response.content = content
        if c.response_content_type:
            response.headers["Content-Type"] = c.response_content_type

        if c.user_is_loggedin and not c.allow_loggedin_cache:
            response.headers["Cache-Control"] = "no-cache"
            response.headers["Pragma"] = "no-cache"

        if c.deny_frames:
            response.headers["X-Frame-Options"] = "DENY"

        # return
        # set content cache
        if (
            g.page_cache_time
            and request.method.upper() == "GET"
            and (not c.user_is_loggedin or c.allow_loggedin_cache)
            and not c.used_cache
            and response.status_code not in (429, 503)
            and response.content
            and response.content[0]
        ):
            try:
                g.rendercache.set(self.request_key(), (response, c.cookies), g.page_cache_time)
            except MemcachedError as e:
                # this codepath will actually never be hit as long as
                # the pagecache memcached client is in no_reply mode.
                g.log.warning("Ignored exception (%r) on pagecache " "write for %r", e, request.path)

        # send cookies
        for k, v in c.cookies.iteritems():
            if v.dirty:
                response.set_cookie(
                    key=k,
                    value=quote(v.value),
                    domain=v.domain,
                    expires=v.expires,
                    secure=getattr(v, "secure", False),
                    httponly=getattr(v, "httponly", False),
                )

        end_time = datetime.now(g.tz)

        # update last_visit
        if c.user_is_loggedin and not g.disallow_db_writes and request.path != "/validuser":
            c.user.update_last_visit(c.start_time)

        if "pylons.routes_dict" in request.environ and "action" in request.environ["pylons.routes_dict"]:
            action = str(request.environ["pylons.routes_dict"]["action"])
        else:
            action = "unknown"
            log_text("unknown action", "no action for %r" % path_info, "warning")
        if g.usage_sampling >= 1.0 or rand.random() < g.usage_sampling:

            amqp.add_kw(
                "usage_q", start_time=c.start_time, end_time=end_time, sampling_rate=g.usage_sampling, action=action
            )

        check_request(end_time)

        # this thread is probably going to be reused, but it could be
        # a while before it is. So we might as well dump the cache in
        # the mean time so that we don't have dead objects hanging
        # around taking up memory
        g.reset_caches()

        # push data to statsd
        if "pylons.action_method" in request.environ:
            # only report web timing data if an action handler was called
            g.stats.transact("web.%s" % action, (end_time - c.start_time).total_seconds())
        g.stats.flush()
Exemplo n.º 24
0
def describe_jobflows(emr_connection, _update=False):
    g.reset_caches()
    jobflows = describe_jobflows_cached(emr_connection, _update=_update)
    return jobflows
Exemplo n.º 25
0
    return True


resume_id = long(sys.argv[1]) if len(sys.argv) > 1 else None

msg_accounts = Account._query(sort=desc("_date"), data=True)

if resume_id:
    msg_accounts._filter(Account.c._id < resume_id)

for account in progress(fetch_things2(msg_accounts), estimate=resume_id):
    current_inbox_count = account.inbox_count
    unread_messages = list(queries.get_unread_inbox(account))

    if account._id % 100000 == 0:
        g.reset_caches()

    if not len(unread_messages):
        if current_inbox_count:
            account._incr('inbox_count', -current_inbox_count)
    else:
        msgs = Message._by_fullname(
            unread_messages,
            data=True,
            return_dict=False,
            ignore_missing=True,
        )
        kept_msgs = sum(1 for msg in msgs if _keep(msg, account))

        if kept_msgs or current_inbox_count:
            account._incr('inbox_count', kept_msgs - current_inbox_count)
Exemplo n.º 26
0
def handle_items(queue, callback, ack=True, limit=1, min_size=0, drain=False, verbose=True, sleep_time=1):
    """Call callback() on every item in a particular queue. If the
    connection to the queue is lost, it will die. Intended to be
    used as a long-running process."""
    if limit < min_size:
        raise ValueError("min_size must be less than limit")
    from pylons import c
    from raven import Client

    if "sentry_dsn" in g.config:
        raven_client = Client(g.config["sentry_dsn"])
    else:
        raven_client = None

    chan = connection_manager.get_channel()
    countdown = None

    while True:
        # NB: None != 0, so we don't need an "is not None" check here
        if countdown == 0:
            break

        msg = chan.basic_get(queue)
        if not msg and drain:
            return
        elif not msg:
            time.sleep(sleep_time)
            continue

        if countdown is None and drain and "message_count" in msg.delivery_info:
            countdown = 1 + msg.delivery_info["message_count"]

        g.reset_caches()
        c.use_write_db = {}

        items = [msg]

        while countdown != 0:
            if countdown is not None:
                countdown -= 1
            if len(items) >= limit:
                break  # the innermost loop only
            msg = chan.basic_get(queue)
            if msg is None:
                if len(items) < min_size:
                    time.sleep(sleep_time)
                else:
                    break
            else:
                items.append(msg)

        try:
            count_str = ""
            if "message_count" in items[-1].delivery_info:
                # the count from the last message, if the count is
                # available
                count_str = "(%d remaining)" % items[-1].delivery_info["message_count"]
            if verbose:
                print "%s: %d items %s" % (queue, len(items), count_str)
            callback(items, chan)

            if ack:
                # ack *all* outstanding messages
                chan.basic_ack(ALL_READ_MESSAGES, multiple=True)

            # flush any log messages printed by the callback
            sys.stdout.flush()
        except:
            if raven_client:
                raven_client.captureException()
            for item in items:
                # explicitly reject the items that we've not processed
                chan.basic_reject(item.delivery_tag, requeue=True)
            raise
Exemplo n.º 27
0
def pushup_permacache(verbosity=1000):
    """When putting cassandra into the permacache chain, we need to
       push everything up into the rest of the chain, so this is
       everything that uses the permacache, as of that check-in."""
    from pylons import g
    from r2.models import Link, Subreddit, Account
    from r2.lib.db.operators import desc
    from r2.lib.comment_tree import comments_key, messages_key
    from r2.lib.utils import fetch_things2, in_chunks
    from r2.lib.utils import last_modified_key
    from r2.lib.promote import promoted_memo_key
    from r2.lib.subreddit_search import load_all_reddits
    from r2.lib.db import queries
    from r2.lib.cache import CassandraCacheChain

    authority = g.permacache.caches[-1]
    nonauthority = CassandraCacheChain(g.permacache.caches[1:-1])

    def populate(keys):
        vals = authority.simple_get_multi(keys)
        if vals:
            nonauthority.set_multi(vals)

    def gen_keys():
        yield promoted_memo_key

        # just let this one do its own writing
        load_all_reddits()

        yield queries.get_all_comments().iden

        l_q = Link._query(Link.c._spam == (True, False),
                          Link.c._deleted == (True, False),
                          sort=desc('_date'),
                          data=True,
                          )
        for link in fetch_things2(l_q, verbosity):
            yield comments_key(link._id)
            yield last_modified_key(link, 'comments')

        a_q = Account._query(Account.c._spam == (True, False),
                             sort=desc('_date'),
                             )
        for account in fetch_things2(a_q, verbosity):
            yield messages_key(account._id)
            yield last_modified_key(account, 'overview')
            yield last_modified_key(account, 'commented')
            yield last_modified_key(account, 'submitted')
            yield last_modified_key(account, 'liked')
            yield last_modified_key(account, 'disliked')
            yield queries.get_comments(account, 'new', 'all').iden
            yield queries.get_submitted(account, 'new', 'all').iden
            yield queries.get_liked(account).iden
            yield queries.get_disliked(account).iden
            yield queries.get_hidden(account).iden
            yield queries.get_saved(account).iden
            yield queries.get_inbox_messages(account).iden
            yield queries.get_unread_messages(account).iden
            yield queries.get_inbox_comments(account).iden
            yield queries.get_unread_comments(account).iden
            yield queries.get_inbox_selfreply(account).iden
            yield queries.get_unread_selfreply(account).iden
            yield queries.get_sent(account).iden

        sr_q = Subreddit._query(Subreddit.c._spam == (True, False),
                                sort=desc('_date'),
                                )
        for sr in fetch_things2(sr_q, verbosity):
            yield last_modified_key(sr, 'stylesheet_contents')
            yield queries.get_links(sr, 'hot', 'all').iden
            yield queries.get_links(sr, 'new', 'all').iden

            for sort in 'top', 'controversial':
                for time in 'hour', 'day', 'week', 'month', 'year', 'all':
                    yield queries.get_links(sr, sort, time,
                                            merge_batched=False).iden
            yield queries.get_spam_links(sr).iden
            yield queries.get_spam_comments(sr).iden
            yield queries.get_reported_links(sr).iden
            yield queries.get_reported_comments(sr).iden
            yield queries.get_subreddit_messages(sr).iden
            yield queries.get_unread_subreddit_messages(sr).iden

    done = 0
    for keys in in_chunks(gen_keys(), verbosity):
        g.reset_caches()
        done += len(keys)
        print 'Done %d: %r' % (done, keys[-1])
        populate(keys)
Exemplo n.º 28
0
    def post(self):
        response = c.response
        content = filter(None, response.content)
        if isinstance(content, (list, tuple)):
            content = ''.join(content)
        for w in c.response_wrappers:
            content = w(content)
        response.content = content
        if c.response_content_type:
            response.headers['Content-Type'] = c.response_content_type

        if c.user_is_loggedin and not c.allow_loggedin_cache:
            response.headers['Cache-Control'] = 'no-cache'
            response.headers['Pragma'] = 'no-cache'

        if c.deny_frames:
            response.headers["X-Frame-Options"] = "DENY"

        #return
        #set content cache
        if (g.page_cache_time
            and request.method.upper() == 'GET'
            and (not c.user_is_loggedin or c.allow_loggedin_cache)
            and not c.used_cache
            and response.status_code not in (429, 503)
            and response.content and response.content[0]):
            try:
                g.rendercache.set(self.request_key(),
                                  (response, c.cookies),
                                  g.page_cache_time)
            except MemcachedError:
                # the key was too big to set in the rendercache
                g.log.debug("Ignored too-big render cache")

        # send cookies
        for k,v in c.cookies.iteritems():
            if v.dirty:
                response.set_cookie(key     = k,
                                    value   = quote(v.value),
                                    domain  = v.domain,
                                    expires = v.expires)

        end_time = datetime.now(g.tz)

        if ('pylons.routes_dict' in request.environ and
            'action' in request.environ['pylons.routes_dict']):
            action = str(request.environ['pylons.routes_dict']['action'])
        else:
            action = "unknown"
            log_text("unknown action", "no action for %r" % path_info,
                     "warning")
        if g.usage_sampling >= 1.0 or rand.random() < g.usage_sampling:

            amqp.add_kw("usage_q",
                        start_time = c.start_time,
                        end_time = end_time,
                        sampling_rate = g.usage_sampling,
                        action = action)

        check_request(end_time)

        # this thread is probably going to be reused, but it could be
        # a while before it is. So we might as well dump the cache in
        # the mean time so that we don't have dead objects hanging
        # around taking up memory
        g.reset_caches()

        # push data to statsd
        if 'pylons.action_method' in request.environ:
            # only report web timing data if an action handler was called
            g.stats.transact('web.%s' % action,
                             (end_time - c.start_time).total_seconds())
        g.stats.flush_timing_stats()
Exemplo n.º 29
0
    def post(self):
        c.request_timer.intermediate("action")

        response = c.response
        content = filter(None, response.content)
        if isinstance(content, (list, tuple)):
            content = ''.join(content)
        for w in c.response_wrappers:
            content = w(content)
        response.content = content
        if c.response_content_type:
            response.headers['Content-Type'] = c.response_content_type

        if c.user_is_loggedin and not c.allow_loggedin_cache:
            response.headers['Cache-Control'] = 'no-cache'
            response.headers['Pragma'] = 'no-cache'

        if c.deny_frames:
            response.headers["X-Frame-Options"] = "DENY"

        #return
        #set content cache
        if (g.page_cache_time and request.method.upper() == 'GET'
                and (not c.user_is_loggedin or c.allow_loggedin_cache)
                and not c.used_cache and response.status_code not in (429, 503)
                and response.content and response.content[0]):
            try:
                g.rendercache.set(self.request_key(), (response, c.cookies),
                                  g.page_cache_time)
            except MemcachedError as e:
                # this codepath will actually never be hit as long as
                # the pagecache memcached client is in no_reply mode.
                g.log.warning(
                    "Ignored exception (%r) on pagecache "
                    "write for %r", e, request.path)

        # send cookies
        for k, v in c.cookies.iteritems():
            if v.dirty:
                response.set_cookie(key=k,
                                    value=quote(v.value),
                                    domain=v.domain,
                                    expires=v.expires,
                                    secure=getattr(v, 'secure', False),
                                    httponly=getattr(v, 'httponly', False))

        end_time = datetime.now(g.tz)

        # update last_visit
        if (c.user_is_loggedin and not g.disallow_db_writes
                and request.method.upper() != "POST"
                and not c.dont_update_last_visit
                and request.path != '/validuser'):
            c.user.update_last_visit(c.start_time)

        check_request(end_time)

        # this thread is probably going to be reused, but it could be
        # a while before it is. So we might as well dump the cache in
        # the mean time so that we don't have dead objects hanging
        # around taking up memory
        g.reset_caches()

        # push data to statsd
        c.request_timer.stop()
        g.stats.flush()
Exemplo n.º 30
0
    def post(self):
        response = c.response
        content = filter(None, response.content)
        if isinstance(content, (list, tuple)):
            content = ''.join(content)
        for w in c.response_wrappers:
            content = w(content)
        response.content = content
        if c.response_content_type:
            response.headers['Content-Type'] = c.response_content_type
        if c.response_access_control:
            c.response.headers['Access-Control'] = c.response_access_control

        if c.user_is_loggedin and not c.allow_loggedin_cache:
            response.headers['Cache-Control'] = 'no-cache'
            response.headers['Pragma'] = 'no-cache'

        #return
        #set content cache
        if (g.page_cache_time
            and request.method.upper() == 'GET'
            and (not c.user_is_loggedin or c.allow_loggedin_cache)
            and not c.used_cache
            and response.status_code != 503
            and response.content and response.content[0]):
            try:
                g.rendercache.set(self.request_key(),
                                  (response, c.cookies),
                                  g.page_cache_time)
            except MemcachedError:
                # the key was too big to set in the rendercache
                g.log.debug("Ignored too-big render cache")

        # send cookies
        for k,v in c.cookies.iteritems():
            if v.dirty:
                response.set_cookie(key     = k,
                                    value   = quote(v.value),
                                    domain  = v.domain,
                                    expires = v.expires)

        if g.usage_sampling <= 0.0:
            return

        if g.usage_sampling >= 1.0 or rand.random() < g.usage_sampling:
            if ('pylons.routes_dict' in request.environ and
                'action' in request.environ['pylons.routes_dict']):
                action = str(request.environ['pylons.routes_dict']['action'])
            else:
                action = "unknown"
                log_text("unknown action",
                         "no action for %r" % path_info,
                         "warning")

            amqp.add_kw("usage_q",
                        start_time = c.start_time,
                        end_time = datetime.now(g.tz),
                        sampling_rate = g.usage_sampling,
                        action = action)

        # this thread is probably going to be reused, but it could be
        # a while before it is. So we might as well dump the cache in
        # the mean time so that we don't have dead objects hanging
        # around taking up memory
        g.reset_caches()
Exemplo n.º 31
0
def update_timer():
    while True:
        g.reset_caches()
        _update_timer()
        sleep(UPDATE_INTERVAL_SECONDS)
Exemplo n.º 32
0
def describe_jobflows_by_state(emr_connection, states, _update=False):
    g.reset_caches()
    jobflows = describe_jobflows_cached(emr_connection, _update=_update)
    return [jf for jf in jobflows if jf.state in states]
Exemplo n.º 33
0
    def post(self):
        c.request_timer.intermediate("action")

        # if the action raised an HTTPException (i.e. it aborted) then pylons
        # will have replaced response with the exception itself.
        c.is_exception_response = getattr(response, "_exception", False)

        if c.response_wrapper and not c.is_exception_response:
            content = flatten_response(response.content)
            wrapped_content = c.response_wrapper(content)
            response.content = wrapped_content

        if c.user_is_loggedin and not c.allow_loggedin_cache:
            response.headers['Cache-Control'] = 'no-cache'
            response.headers['Pragma'] = 'no-cache'

        if c.deny_frames:
            response.headers["X-Frame-Options"] = "DENY"

        # save the result of this page to the pagecache if possible.  we
        # mustn't cache things that rely on state not tracked by request_key
        # such as If-Modified-Since headers for 304s or requesting IP for 429s.
        if (g.page_cache_time
            and request.method.upper() == 'GET'
            and c.can_use_pagecache
            and not c.used_cache
            and response.status_int not in (304, 429)
            and not response.status.startswith("5")
            and not c.is_exception_response):
            try:
                g.pagecache.set(self.request_key(),
                                (response._current_obj(), c.cookies),
                                g.page_cache_time)
            except MemcachedError as e:
                # this codepath will actually never be hit as long as
                # the pagecache memcached client is in no_reply mode.
                g.log.warning("Ignored exception (%r) on pagecache "
                              "write for %r", e, request.path)

        pragmas = [p.strip() for p in
                   request.headers.get("Pragma", "").split(",")]
        if g.debug or "x-reddit-pagecache" in pragmas:
            if c.can_use_pagecache:
                pagecache_state = "hit" if c.used_cache else "miss"
            else:
                pagecache_state = "disallowed"
            response.headers["X-Reddit-Pagecache"] = pagecache_state

        # send cookies
        for k, v in c.cookies.iteritems():
            if v.dirty:
                response.set_cookie(key=k,
                                    value=quote(v.value),
                                    domain=v.domain,
                                    expires=v.expires,
                                    secure=getattr(v, 'secure', False),
                                    httponly=getattr(v, 'httponly', False))

        if self.should_update_last_visit():
            c.user.update_last_visit(c.start_time)

        hooks.get_hook("reddit.request.end").call()

        # this thread is probably going to be reused, but it could be
        # a while before it is. So we might as well dump the cache in
        # the mean time so that we don't have dead objects hanging
        # around taking up memory
        g.reset_caches()

        c.request_timer.intermediate("post")

        # push data to statsd
        c.request_timer.stop()
        g.stats.flush()
Exemplo n.º 34
0
def describe_jobflows_by_ids(emr_connection, jobflow_ids, _update=False):
    g.reset_caches()
    jobflows = describe_jobflows_cached(emr_connection, _update=_update)
    return [jf for jf in jobflows if jf.jobflowid in jobflow_ids]
Exemplo n.º 35
0
def describe_jobflows_by_state(emr_connection, states, _update=False):
    g.reset_caches()
    jobflows = describe_jobflows_cached(emr_connection, _update=_update)
    return [jf for jf in jobflows if jf.state in states]
Exemplo n.º 36
0
    def post(self):
        response = c.response
        content = filter(None, response.content)
        if isinstance(content, (list, tuple)):
            content = ''.join(content)
        for w in c.response_wrappers:
            content = w(content)
        response.content = content
        if c.response_content_type:
            response.headers['Content-Type'] = c.response_content_type
        if c.response_access_control:
            c.response.headers['Access-Control'] = c.response_access_control

        if c.user_is_loggedin and not c.allow_loggedin_cache:
            response.headers['Cache-Control'] = 'no-cache'
            response.headers['Pragma'] = 'no-cache'

        #return
        #set content cache
        if (g.page_cache_time
            and request.method.upper() == 'GET'
            and (not c.user_is_loggedin or c.allow_loggedin_cache)
            and not c.used_cache
            and response.status_code != 503
            and response.content and response.content[0]):
            try:
                g.rendercache.set(self.request_key(),
                                  (response, c.cookies),
                                  g.page_cache_time)
            except MemcachedError:
                # the key was too big to set in the rendercache
                g.log.debug("Ignored too-big render cache")

        # send cookies
        for k,v in c.cookies.iteritems():
            if v.dirty:
                response.set_cookie(key     = k,
                                    value   = quote(v.value),
                                    domain  = v.domain,
                                    expires = v.expires)

        if g.logans_run_limit:
            if c.start_time > g.logans_run_limit and not g.shutdown:
                g.log.info("Time to restart. It's been an honor serving with you.")
                g.shutdown = 'init'

        if g.usage_sampling <= 0.0:
            return

        if g.usage_sampling >= 1.0 or rand.random() < g.usage_sampling:
            if ('pylons.routes_dict' in request.environ and
                'action' in request.environ['pylons.routes_dict']):
                action = str(request.environ['pylons.routes_dict']['action'])
            else:
                action = "unknown"
                log_text("unknown action",
                         "no action for %r" % path_info,
                         "warning")

            amqp.add_kw("usage_q",
                        start_time = c.start_time,
                        end_time = datetime.now(g.tz),
                        sampling_rate = g.usage_sampling,
                        action = action)

        # this thread is probably going to be reused, but it could be
        # a while before it is. So we might as well dump the cache in
        # the mean time so that we don't have dead objects hanging
        # around taking up memory
        g.reset_caches()
Exemplo n.º 37
0
def update_timer():
    while True:
        g.reset_caches()
        _update_timer()
        sleep(UPDATE_INTERVAL_SECONDS)
Exemplo n.º 38
0
    def post(self):
        c.request_timer.intermediate("action")

        # if the action raised an HTTPException (i.e. it aborted) then pylons
        # will have replaced response with the exception itself.
        c.is_exception_response = getattr(response, "_exception", False)

        if c.response_wrapper and not c.is_exception_response:
            content = flatten_response(response.content)
            wrapped_content = c.response_wrapper(content)
            response.content = wrapped_content

        if c.user_is_loggedin and not c.allow_loggedin_cache:
            response.headers['Cache-Control'] = 'no-cache'
            response.headers['Pragma'] = 'no-cache'

        if c.deny_frames:
            response.headers["X-Frame-Options"] = "DENY"

        #set content cache
        if (g.page_cache_time
            and request.method.upper() == 'GET'
            and (not c.user_is_loggedin or c.allow_loggedin_cache)
            and not c.used_cache
            and response.status_int != 429
            and not response.status.startswith("5")
            and not c.is_exception_response):
            try:
                g.pagecache.set(self.request_key(),
                                (response._current_obj(), c.cookies),
                                g.page_cache_time)
            except MemcachedError as e:
                # this codepath will actually never be hit as long as
                # the pagecache memcached client is in no_reply mode.
                g.log.warning("Ignored exception (%r) on pagecache "
                              "write for %r", e, request.path)

        # send cookies
        for k, v in c.cookies.iteritems():
            if v.dirty:
                response.set_cookie(key=k,
                                    value=quote(v.value),
                                    domain=v.domain,
                                    expires=v.expires,
                                    secure=getattr(v, 'secure', False),
                                    httponly=getattr(v, 'httponly', False))

        if self.should_update_last_visit():
            c.user.update_last_visit(c.start_time)

        hooks.get_hook("reddit.request.end").call()

        # this thread is probably going to be reused, but it could be
        # a while before it is. So we might as well dump the cache in
        # the mean time so that we don't have dead objects hanging
        # around taking up memory
        g.reset_caches()

        c.request_timer.intermediate("post")

        # push data to statsd
        c.request_timer.stop()
        g.stats.flush()
Exemplo n.º 39
0
def describe_jobflows(emr_connection, _update=False):
    g.reset_caches()
    jobflows = describe_jobflows_cached(emr_connection, _update=_update)
    return jobflows
Exemplo n.º 40
0
    def post(self):
        c.request_timer.intermediate("action")

        # if the action raised an HTTPException (i.e. it aborted) then pylons
        # will have replaced response with the exception itself.
        c.is_exception_response = getattr(response, "_exception", False)

        if c.response_wrapper and not c.is_exception_response:
            content = flatten_response(response.content)
            wrapped_content = c.response_wrapper(content)
            response.content = wrapped_content

        if c.user_is_loggedin and not c.allow_loggedin_cache:
            response.headers['Cache-Control'] = 'no-cache'
            response.headers['Pragma'] = 'no-cache'

        if c.deny_frames:
            response.headers["X-Frame-Options"] = "DENY"

        #set content cache
        if (g.page_cache_time
            and request.method.upper() == 'GET'
            and (not c.user_is_loggedin or c.allow_loggedin_cache)
            and not c.used_cache
            and response.status_int != 429
            and not response.status.startswith("5")
            and not c.is_exception_response):
            try:
                g.pagecache.set(self.request_key(),
                                (response._current_obj(), c.cookies),
                                g.page_cache_time)
            except MemcachedError as e:
                # this codepath will actually never be hit as long as
                # the pagecache memcached client is in no_reply mode.
                g.log.warning("Ignored exception (%r) on pagecache "
                              "write for %r", e, request.path)

        # send cookies
        for k, v in c.cookies.iteritems():
            if v.dirty:
                response.set_cookie(key=k,
                                    value=quote(v.value),
                                    domain=v.domain,
                                    expires=v.expires,
                                    secure=getattr(v, 'secure', False),
                                    httponly=getattr(v, 'httponly', False))

        if self.should_update_last_visit():
            c.user.update_last_visit(c.start_time)

        hooks.get_hook("reddit.request.end").call()

        # this thread is probably going to be reused, but it could be
        # a while before it is. So we might as well dump the cache in
        # the mean time so that we don't have dead objects hanging
        # around taking up memory
        g.reset_caches()

        c.request_timer.intermediate("post")

        # push data to statsd
        c.request_timer.stop()
        g.stats.flush()
Exemplo n.º 41
0
def pushup_permacache(verbosity=1000):
    """When putting cassandra into the permacache chain, we need to
       push everything up into the rest of the chain, so this is
       everything that uses the permacache, as of that check-in."""
    from pylons import g
    from r2.models import Link, Subreddit, Account
    from r2.lib.db.operators import desc
    from r2.lib.comment_tree import comments_key, messages_key
    from r2.lib.utils import fetch_things2, in_chunks
    from r2.lib.utils import last_modified_key
    from r2.lib.promote import promoted_memo_key
    from r2.lib.subreddit_search import load_all_reddits
    from r2.lib.db import queries
    from r2.lib.cache import CassandraCacheChain

    authority = g.permacache.caches[-1]
    nonauthority = CassandraCacheChain(g.permacache.caches[1:-1])

    def populate(keys):
        vals = authority.simple_get_multi(keys)
        if vals:
            nonauthority.set_multi(vals)

    def gen_keys():
        yield promoted_memo_key

        # just let this one do its own writing
        load_all_reddits()

        yield queries.get_all_comments().iden

        l_q = Link._query(Link.c._spam == (True, False),
                          Link.c._deleted == (True, False),
                          sort=desc('_date'),
                          data=True,
                          )
        for link in fetch_things2(l_q, verbosity):
            yield comments_key(link._id)
            yield last_modified_key(link, 'comments')

        a_q = Account._query(Account.c._spam == (True, False),
                             sort=desc('_date'),
                             )
        for account in fetch_things2(a_q, verbosity):
            yield messages_key(account._id)
            yield last_modified_key(account, 'overview')
            yield last_modified_key(account, 'commented')
            yield last_modified_key(account, 'submitted')
            yield last_modified_key(account, 'liked')
            yield last_modified_key(account, 'disliked')
            yield queries.get_comments(account, 'new', 'all').iden
            yield queries.get_submitted(account, 'new', 'all').iden
            yield queries.get_liked(account).iden
            yield queries.get_disliked(account).iden
            yield queries.get_hidden(account).iden
            yield queries.get_saved(account).iden
            yield queries.get_inbox_messages(account).iden
            yield queries.get_unread_messages(account).iden
            yield queries.get_inbox_comments(account).iden
            yield queries.get_unread_comments(account).iden
            yield queries.get_inbox_selfreply(account).iden
            yield queries.get_unread_selfreply(account).iden
            yield queries.get_sent(account).iden

        sr_q = Subreddit._query(Subreddit.c._spam == (True, False),
                                sort=desc('_date'),
                                )
        for sr in fetch_things2(sr_q, verbosity):
            yield last_modified_key(sr, 'stylesheet_contents')
            yield queries.get_links(sr, 'hot', 'all').iden
            yield queries.get_links(sr, 'new', 'all').iden

            for sort in 'top', 'controversial':
                for time in 'hour', 'day', 'week', 'month', 'year', 'all':
                    yield queries.get_links(sr, sort, time,
                                            merge_batched=False).iden
            yield queries.get_spam_links(sr).iden
            yield queries.get_spam_comments(sr).iden
            yield queries.get_reported_links(sr).iden
            yield queries.get_reported_comments(sr).iden
            yield queries.get_subreddit_messages(sr).iden
            yield queries.get_unread_subreddit_messages(sr).iden

    done = 0
    for keys in in_chunks(gen_keys(), verbosity):
        g.reset_caches()
        done += len(keys)
        print 'Done %d: %r' % (done, keys[-1])
        populate(keys)
Exemplo n.º 42
0
    return True

resume_id = long(sys.argv[1]) if len(sys.argv) > 1 else None

msg_accounts = Account._query(sort=desc("_date"), data=True)

if resume_id:
    msg_accounts._filter(Account.c._id < resume_id)

for account in progress(fetch_things2(msg_accounts), estimate=resume_id):
    current_inbox_count = account.inbox_count
    unread_messages = list(queries.get_unread_inbox(account))

    if account._id % 100000 == 0:
        g.reset_caches()

    if not len(unread_messages):
        if current_inbox_count:
            account._incr('inbox_count', -current_inbox_count)
    else:
        msgs = Message._by_fullname(
            unread_messages,
            data=True,
            return_dict=False,
            ignore_missing=True,
        )
        kept_msgs = sum(1 for msg in msgs if _keep(msg, account))

        if kept_msgs or current_inbox_count:
            account._incr('inbox_count', kept_msgs - current_inbox_count)
Exemplo n.º 43
0
 def batch_fn(items):
     g.reset_caches()
     return items
Exemplo n.º 44
0
 def batch_fn(items):
     g.reset_caches()
     return items
Exemplo n.º 45
0
    def post(self):
        response = c.response
        content = filter(None, response.content)
        if isinstance(content, (list, tuple)):
            content = "".join(content)
        for w in c.response_wrappers:
            content = w(content)
        response.content = content
        if c.response_content_type:
            response.headers["Content-Type"] = c.response_content_type

        if c.user_is_loggedin and not c.allow_loggedin_cache:
            response.headers["Cache-Control"] = "no-cache"
            response.headers["Pragma"] = "no-cache"

        if c.deny_frames:
            response.headers["X-Frame-Options"] = "DENY"

        # return
        # set content cache
        if (
            g.page_cache_time
            and request.method.upper() == "GET"
            and (not c.user_is_loggedin or c.allow_loggedin_cache)
            and not c.used_cache
            and response.status_code != 503
            and response.content
            and response.content[0]
        ):
            try:
                g.rendercache.set(self.request_key(), (response, c.cookies), g.page_cache_time)
            except MemcachedError:
                # the key was too big to set in the rendercache
                g.log.debug("Ignored too-big render cache")

        # send cookies
        for k, v in c.cookies.iteritems():
            if v.dirty:
                response.set_cookie(key=k, value=quote(v.value), domain=v.domain, expires=v.expires)

        if g.usage_sampling <= 0.0:
            return

        if g.usage_sampling >= 1.0 or rand.random() < g.usage_sampling:
            if "pylons.routes_dict" in request.environ and "action" in request.environ["pylons.routes_dict"]:
                action = str(request.environ["pylons.routes_dict"]["action"])
            else:
                action = "unknown"
                log_text("unknown action", "no action for %r" % path_info, "warning")

            amqp.add_kw(
                "usage_q",
                start_time=c.start_time,
                end_time=datetime.now(g.tz),
                sampling_rate=g.usage_sampling,
                action=action,
            )

        # this thread is probably going to be reused, but it could be
        # a while before it is. So we might as well dump the cache in
        # the mean time so that we don't have dead objects hanging
        # around taking up memory
        g.reset_caches()