Пример #1
0
def context_to_string(ctx):
    """
    Used in debug mode to convert a context represented as an integer to a
    diagnostic string.
    """
    state = context.state_of(ctx)
    element = context.element_type_of(ctx)
    attr = context.attr_type_of(ctx)
    delim = context.delim_type_of(ctx)
    js_ctx = context.js_ctx_of(ctx)
    url_part = context.url_part_of(ctx)

    parts = [_STATE_NAMES[state],
             element and _ELEMENT_NAMES[element],
             attr and _ATTR_NAMES[attr],
             delim and _DELIM_NAMES[delim],
             js_ctx and _JS_CTX_NAMES[js_ctx],
             url_part and _URL_PART_NAMES[url_part],
             ]
    return "[Context %s]" % " ".join(
        [part or 'UNKNOWN' for part in parts if part])
Пример #2
0
def context_to_string(ctx):
    """
    Used in debug mode to convert a context represented as an integer to a
    diagnostic string.
    """
    state = context.state_of(ctx)
    element = context.element_type_of(ctx)
    attr = context.attr_type_of(ctx)
    delim = context.delim_type_of(ctx)
    js_ctx = context.js_ctx_of(ctx)
    url_part = context.url_part_of(ctx)

    parts = [
        _STATE_NAMES[state],
        element and _ELEMENT_NAMES[element],
        attr and _ATTR_NAMES[attr],
        delim and _DELIM_NAMES[delim],
        js_ctx and _JS_CTX_NAMES[js_ctx],
        url_part and _URL_PART_NAMES[url_part],
    ]
    return "[Context %s]" % " ".join(
        [part or 'UNKNOWN' for part in parts if part])
Пример #3
0
    def test_is_regex_preceder(self):
        """Test heuristic that is used to update JS_CTX_*"""
        tests = (
            # Statement terminators precede regexps.
            (context.JS_CTX_REGEX, ";"),
            # This is not airtight.
            #     ({ valueOf: function () { return 1 } } / 2)
            # is valid JavaScript but in practice, devs do not do this.
            # A block followed by a statement starting with a RegExp is
            # much more common:
            #     while (x) {...} /foo/.test(x) || panic()
            (context.JS_CTX_REGEX, "}"),
            # But member, call, grouping, and array expression terminators
            # precede div ops.
            (context.JS_CTX_DIV_OP, ")"),
            (context.JS_CTX_DIV_OP, "]"),
            # At the start of a primary expression, array, or expression
            # statement, expect a regexp.
            (context.JS_CTX_REGEX, "("),
            (context.JS_CTX_REGEX, "["),
            (context.JS_CTX_REGEX, "{"),
            # Assignment operators precede regexps as do all exclusively
            # prefix and binary operators.
            (context.JS_CTX_REGEX, "="),
            (context.JS_CTX_REGEX, "+="),
            (context.JS_CTX_REGEX, "*="),
            (context.JS_CTX_REGEX, "*"),
            (context.JS_CTX_REGEX, "!"),
            # Whether the + or - is infix or prefix, it cannot precede a
            # div op.
            (context.JS_CTX_REGEX, "+"),
            (context.JS_CTX_REGEX, "-"),
            # An incr/decr op precedes a div operator.
            # This is not airtight. In (g = ++/h/i) a regexp follows a
            # pre-increment operator, but in practice devs do not try to
            # increment or decrement regular expressions.
            # (g++/h/i) where ++ is a postfix operator on g is much more
            # common.
            (context.JS_CTX_DIV_OP, "--"),
            (context.JS_CTX_DIV_OP, "++"),
            (context.JS_CTX_DIV_OP, "x--"),
            # When we have many dashes or pluses, then they are grouped
            # left to right.
            (context.JS_CTX_REGEX, "x---"), # A postfix -- then a -.
            # return followed by a slash returns the regexp literal or the
            # slash starts a regexp literal in an expression statement that
            # is dead code.
            (context.JS_CTX_REGEX, "return"),
            (context.JS_CTX_REGEX, "return "),
            (context.JS_CTX_REGEX, "return\t"),
            (context.JS_CTX_REGEX, "return\n"),
            (context.JS_CTX_REGEX, u"return\u2028"),
            # Identifiers can be divided and cannot validly be preceded by
            # a regular expressions. Semicolon insertion cannot happen
            # between an identifier and a regular expression on a new line
            # because the one token lookahead for semicolon insertion has
            # to conclude that it could be a div binary op and treat it as
            # such.
            (context.JS_CTX_DIV_OP, "x"),
            (context.JS_CTX_DIV_OP, "x "),
            (context.JS_CTX_DIV_OP, "x\t"),
            (context.JS_CTX_DIV_OP, "x\n"),
            (context.JS_CTX_DIV_OP, u"x\u2028"),
            (context.JS_CTX_DIV_OP, "preturn"),
            # Numbers precede div ops.
            (context.JS_CTX_DIV_OP, "0"),
            # Dots that are part of a number are div preceders.
            (context.JS_CTX_DIV_OP, "0."),
            )

        for want_ctx, js_code in tests:
            for start in (context.JS_CTX_REGEX, context.JS_CTX_DIV_OP,
                          context.JS_CTX_DIV_OP | context.STATE_JS):
                got = js.next_js_ctx(js_code, start)
                want = want_ctx | context.state_of(start)
                self.assertEquals(
                    want, got,
                    "%s: want %s got %s" % (
                        js_code,
                        debug.context_to_string(want),
                        debug.context_to_string(got)))

        self.assertEquals(
            context.STATE_JS | context.JS_CTX_REGEX,
            js.next_js_ctx("   ", context.STATE_JS | context.JS_CTX_REGEX),
            "Blank tokens")
        self.assertEquals(
            context.STATE_JS | context.JS_CTX_DIV_OP,
            js.next_js_ctx("   ", context.STATE_JS | context.JS_CTX_DIV_OP),
            "Blank tokens")
def esc_mode_for_hole(context_before):
    """
    Given a context in which an untrusted value hole appears, computes the
    escaping modes needed to render that untrusted value safe for interpolation
    and the context after the hole.

    context_before - The input context before the substitution.

    Returns (context after, (escaping_modes...,))
    """
    ctx = context.force_epsilon_transition(context_before)
    state, url_part = context.state_of(ctx), context.url_part_of(ctx)
    esc_modes = [ESC_MODE_FOR_STATE[state]]
    problem = None

    if url_part == context.URL_PART_NONE:
        # Make sure that at the start of a URL, we filter out dangerous
        # protocols.
        if state in (
            context.STATE_URL, context.STATE_CSS_URL, context.STATE_CSSDQ_URL,
            context.STATE_CSSSQ_URL):
            esc_modes = [ESC_MODE_FILTER_URL, ESC_MODE_NORMALIZE_URL]
            ctx = (ctx & ~context.URL_PART_ALL) | context.URL_PART_PRE_QUERY
        elif state in (context.STATE_CSSDQ_STR, context.STATE_CSSSQ_STR):
            esc_modes[:0] = [ESC_MODE_FILTER_URL]
            ctx = (ctx & ~context.URL_PART_ALL) | context.URL_PART_PRE_QUERY
    elif url_part == context.URL_PART_PRE_QUERY:
        if state not in (context.STATE_CSSDQ_STR, context.STATE_CSSSQ_STR):
            esc_modes[0] = ESC_MODE_NORMALIZE_URL
    elif url_part == context.URL_PART_QUERY_OR_FRAG:
        esc_modes[0] = ESC_MODE_ESCAPE_URL
    elif url_part == context.URL_PART_UNKNOWN:
        ctx = context.STATE_ERROR
        problem = 'hole appears in an ambiguous URL context'

    if state == context.STATE_JS:
        ctx = (ctx & ~context.JS_CTX_ALL) | context.JS_CTX_DIV_OP
    elif (state == context.STATE_ATTR_NAME
          and context.attr_type_of(ctx) != context.ATTR_NONE):
        esc_modes[0] = ESC_MODE_FILTER_HTML_ATTR_SUFFIX

    if esc_modes[0] is None:
        ctx = context.STATE_ERROR

    esc_mode = esc_modes[-1]
    delim_type = context.delim_type_of(ctx)
    if delim_type != context.DELIM_NONE:
        # Figure out how to escape the attribute value.
        if esc_mode != ESC_MODE_ESCAPE_HTML_ATTRIBUTE:
            esc_modes.append(ESC_MODE_ESCAPE_HTML_ATTRIBUTE)
        if (context.delim_type_of(context_before) == context.DELIM_NONE
            and delim_type == context.DELIM_SPACE_OR_TAG_END):
            esc_modes.append(ESC_MODE_OPEN_QUOTE)

    last, i = esc_modes[0], 1
    while i < len(esc_modes):
        curr = esc_modes[i]
        # If, for all x, f(g(x)) == g(x), we can skip f.
        if (last, curr) in REDUNDANT_ESC_MODES:
            esc_modes[i:i+1] = []
        else:
            last = curr
            i += 1
    return ctx, tuple(esc_modes), problem
def esc_mode_for_hole(context_before):
    """
    Given a context in which an untrusted value hole appears, computes the
    escaping modes needed to render that untrusted value safe for interpolation
    and the context after the hole.

    context_before - The input context before the substitution.

    Returns (context after, (escaping_modes...,))
    """
    ctx = context.force_epsilon_transition(context_before)
    state, url_part = context.state_of(ctx), context.url_part_of(ctx)
    esc_modes = [ESC_MODE_FOR_STATE[state]]
    problem = None

    if url_part == context.URL_PART_NONE:
        # Make sure that at the start of a URL, we filter out dangerous
        # protocols.
        if state in (context.STATE_URL, context.STATE_CSS_URL,
                     context.STATE_CSSDQ_URL, context.STATE_CSSSQ_URL):
            esc_modes = [ESC_MODE_FILTER_URL, ESC_MODE_NORMALIZE_URL]
            ctx = (ctx & ~context.URL_PART_ALL) | context.URL_PART_PRE_QUERY
        elif state in (context.STATE_CSSDQ_STR, context.STATE_CSSSQ_STR):
            esc_modes[:0] = [ESC_MODE_FILTER_URL]
            ctx = (ctx & ~context.URL_PART_ALL) | context.URL_PART_PRE_QUERY
    elif url_part == context.URL_PART_PRE_QUERY:
        if state not in (context.STATE_CSSDQ_STR, context.STATE_CSSSQ_STR):
            esc_modes[0] = ESC_MODE_NORMALIZE_URL
    elif url_part == context.URL_PART_QUERY_OR_FRAG:
        esc_modes[0] = ESC_MODE_ESCAPE_URL
    elif url_part == context.URL_PART_UNKNOWN:
        ctx = context.STATE_ERROR
        problem = 'hole appears in an ambiguous URL context'

    if state == context.STATE_JS:
        ctx = (ctx & ~context.JS_CTX_ALL) | context.JS_CTX_DIV_OP
    elif (state == context.STATE_ATTR_NAME
          and context.attr_type_of(ctx) != context.ATTR_NONE):
        esc_modes[0] = ESC_MODE_FILTER_HTML_ATTR_SUFFIX

    if esc_modes[0] is None:
        ctx = context.STATE_ERROR

    esc_mode = esc_modes[-1]
    delim_type = context.delim_type_of(ctx)
    if delim_type != context.DELIM_NONE:
        # Figure out how to escape the attribute value.
        if esc_mode != ESC_MODE_ESCAPE_HTML_ATTRIBUTE:
            esc_modes.append(ESC_MODE_ESCAPE_HTML_ATTRIBUTE)
        if (context.delim_type_of(context_before) == context.DELIM_NONE
                and delim_type == context.DELIM_SPACE_OR_TAG_END):
            esc_modes.append(ESC_MODE_OPEN_QUOTE)

    last, i = esc_modes[0], 1
    while i < len(esc_modes):
        curr = esc_modes[i]
        # If, for all x, f(g(x)) == g(x), we can skip f.
        if (last, curr) in REDUNDANT_ESC_MODES:
            esc_modes[i:i + 1] = []
        else:
            last = curr
            i += 1
    return ctx, tuple(esc_modes), problem