Exemplo n.º 1
0
def lint_js_files_are_translated(files_to_lint):
    """Verify that nltext in the js files are marked for translation.

    See docstring of: _lint_js_content

    Returns:
       List of triples: (filename, lineno, error message)
    """
    # Make sure jsx files are compiled first, then we will lint the resulting
    # js.
    kake.make.build_many([
        ('genfiles/compiled_jsx/en/%s.js' % ka_root.relpath(f), {})
        for f in files_to_lint if f.endswith('.jsx')
    ])

    files_to_lint = lintutil.filter(files_to_lint, suffix=('.js', '.jsx'))
    for f in files_to_lint:
        abs_f = f
        f = ka_root.relpath(f)

        # Exclude files that we don't need to translate: we don't care
        # if those files are 'properly' marked up or not.
        if intl.english_only.should_not_translate_file(f):
            continue

        if f.endswith(".jsx"):
            abs_f = "%s/genfiles/compiled_jsx/en/%s.js" % (ka_root.root, f)
            f = ka_root.relpath(abs_f)

        file_contents = lintutil.file_contents(abs_f)

        for error in _lint_js_content(abs_f, file_contents):
            yield error
Exemplo n.º 2
0
def lint_javascript_in_html(files_to_lint):
    """Run eslint on javascript content inside html files.

    We want to make sure that the js in our html files has no unused
    variables/etc, so we can do better dependency analysis on it.
    """
    files = lintutil.filter(files_to_lint, prefix='templates/', suffix='.html')

    lint_inputs = []      # list of filename/contents pairs

    for f in files:
        contents_of_f = lintutil.file_contents(f)

        # extract_js_from_html actually can return several versions
        # of the js contents, each with a different branch of an
        # if/else commented out.  (So for input like
        #  <script>var x = { {%if c%}y: 4{%else%}y: 5{%endif%} };</script>
        # we'd see both 'var x = { y: 4 };' and 'var x = { y: 5 }')
        # We lint all such strings and combine the output.
        js_contents_iter = js_in_html.extract_js_from_html(contents_of_f,
                                                           "jinja2",
                                                           file_name=f)

        try:
            for js_contents in js_contents_iter:
                lint_inputs.append((f, js_contents))
        except Exception, why:
            yield (f, 1, 'HTML parse error: %s' % why)
            continue
Exemplo n.º 3
0
def lint_js_files_are_translated(files_to_lint):
    """Verify that nltext in the js files are marked for translation.

    See docstring of: _lint_js_content

    Returns:
       List of triples: (filename, lineno, error message)
    """
    # Make sure jsx files are compiled first, then we will lint the resulting
    # js.
    kake.make.build_many([('genfiles/compiled_jsx/en/%s.js' %
                           ka_root.relpath(f), {})
                          for f in files_to_lint if f.endswith('.jsx')])

    files_to_lint = lintutil.filter(files_to_lint, suffix=('.js', '.jsx'))
    for f in files_to_lint:
        abs_f = f
        f = ka_root.relpath(f)

        # Exclude files that we don't need to translate: we don't care
        # if those files are 'properly' marked up or not.
        if intl.english_only.should_not_translate_file(f):
            continue

        if f.endswith(".jsx"):
            abs_f = "%s/genfiles/compiled_jsx/en/%s.js" % (ka_root.root, f)
            f = ka_root.relpath(abs_f)

        file_contents = lintutil.file_contents(abs_f)

        for error in _lint_js_content(abs_f, file_contents):
            yield error
Exemplo n.º 4
0
def lint_duplicate_test_functions(files_to_lint):
    """Enforce that no two test functions in the same class have the same name.

    This prevents one test from not running because another test overwrites it.
    """
    files = lintutil.filter(files_to_lint, suffix='_test.py')

    for filename in files:
        contents = lintutil.file_contents(filename)

        functions = _matches_with_line_numbers(_TEST_FN_RE, contents)
        classes = _matches_with_line_numbers(_CLASS_NAME_RE, contents)

        lines = collections.defaultdict(list)

        for fname, line in functions:
            cls = _find_class_at_line(classes, line)
            if cls is None:
                continue
            qualified_fname = cls + '.' + fname
            lines[qualified_fname] += [line]

        for fname, dup_lines in lines.iteritems():
            if len(dup_lines) > 1:
                if any(
                        lintutil.has_nolint(filename, line)
                        for line in dup_lines):
                    continue

                yield (filename, max(dup_lines),
                       'Duplicate test function name %(name)s'
                       ' at lines %(lines)s' % {
                           'name': fname,
                           'lines': ', '.join(str(l) for l in dup_lines)
                       })
Exemplo n.º 5
0
def lint_duplicate_test_functions(files_to_lint):
    """Enforce that no two test functions in the same class have the same name.

    This prevents one test from not running because another test overwrites it.
    """
    files = lintutil.filter(files_to_lint, suffix='_test.py')

    for filename in files:
        contents = lintutil.file_contents(filename)

        functions = _matches_with_line_numbers(_TEST_FN_RE, contents)
        classes = _matches_with_line_numbers(_CLASS_NAME_RE, contents)

        lines = collections.defaultdict(list)

        for fname, line in functions:
            cls = _find_class_at_line(classes, line)
            if cls is None:
                continue
            qualified_fname = cls + '.' + fname
            lines[qualified_fname] += [line]

        for fname, dup_lines in lines.iteritems():
            if len(dup_lines) > 1:
                if any(lintutil.has_nolint(filename, line)
                       for line in dup_lines):
                    continue

                yield (filename, max(dup_lines),
                       'Duplicate test function name %(name)s'
                       ' at lines %(lines)s' % {
                           'name': fname,
                           'lines': ', '.join(str(l) for l in dup_lines)
                       })
Exemplo n.º 6
0
def lint_handlebars_debugger_statement(files_to_lint):
    """Make sure we don't have any stray {{debugger}} statements."""
    files = lintutil.filter(files_to_lint, suffix='.handlebars')
    for (fname, linenum, _) in lintutil.find_lines(
            files, _HANDLEBARS_DEBUGGER_RE):
        yield (fname, linenum,
               'All {{debugger}} statements should be removed.')
Exemplo n.º 7
0
def lint_jsx_fixture_files(files_to_lint):
    """Ensure that fixture files have an associated fixtures_test.js file."""
    # We only want fixture files
    fixture_files_to_lint = lintutil.filter(files_to_lint,
        suffix=('.jsx.fixture.js', '.jsx.fixture.jsx'))

    for fixture_file in fixture_files_to_lint:
        path, fixture_name = os.path.split(fixture_file)

        # We need to potentially walk up the tree to find the "-package"
        # directory which will old the fixtures_test.js file.
        while path and not path.endswith("-package"):
            path = os.path.dirname(path)
        fixture_test_file = os.path.join(path, "fixtures_test.js")

        # Check to see if the fixtures_test.js file is completely missing
        if not os.path.isfile(fixture_test_file):
            yield (fixture_file, 1,
                   "Fixture files must have an associated fixtures_test.js "
                   "file. Run tools/regen_fixture_tests.sh to create it.")
        else:
            # Otherwise, it exists, so make sure that the fixture file is
            # referenced inside the fixtures_test.js file.
            search_re = re.compile(r'/%s"' % re.escape(fixture_name))
            if not lintutil.line_number(fixture_test_file, search_re, 0):
                yield (fixture_test_file, 1,
                       "Fixture test is missing %s. Run "
                       "tools/regen_fixture_tests.sh to update it." %
                           fixture_name)
Exemplo n.º 8
0
def lint_every_rendered_component_has_a_fixture(files_to_lint):
    """Check that every component we render has an associated fixture file.

    In order to test that a particular react component can be
    server-side rendered, we need to actually try to render it with a
    particular value for props.  This is what component.fixture.js
    files are for.  We just make sure the people write them!

    For now, we allow the fixture file to be empty (just `[]`).  Later
    we may insist on actually useful fixtures.
    """
    files_to_lint = lintutil.filter(files_to_lint, suffix='.html')

    for f in files_to_lint:
        contents_of_f = lintutil.file_contents(f)
        for m in RENDER_REACT_RE.finditer(contents_of_f):
            component_file = m.group(1)
            # To be server-side renderable, the fixture file has to be
            # a javascript file, not jsx or something else.
            fixture_file = component_file + '.fixture.js'
            if not os.path.exists(ka_root.join(fixture_file)):
                linenum = contents_of_f.count('\n', 0, m.start()) + 1
                yield (f, linenum,
                       '%s must have an associated fixture file %s'
                       % (component_file, fixture_file))
Exemplo n.º 9
0
def lint_package_imports(files_to_lint):
    """Find cases where we import a package (directory) rather than a module.

    For instance, "import emails".  That is not a module, it's a
    directory.

    Sometimes importing a directory is correct, for third-party
    modules with non-trivial __init__.py's.  But it's never right for
    our code.
    """
    files_to_lint = lintutil.filter(files_to_lint, suffix='.py')

    for f in files_to_lint:
        import_lines = _get_import_lines(f)
        for import_line in import_lines:
            if import_line.module.startswith(('__future__', 'third_party')):
                # The rules are different for third-party code, and
                # __future__'s aren't even real modules.
                continue
            if _is_a_first_party_package(import_line.module):
                contents = lintutil.file_contents(f)
                corrections = re.findall(r'%s\.[\w_]+' % import_line.name,
                                         contents)
                yield (
                    f, import_line.lineno,
                    "Do not import the whole directory '%s'; "
                    "import the modules inside it, possibly: %s" %
                    (import_line.module, ', '.join(sorted(set(corrections)))))
Exemplo n.º 10
0
def lint_every_rendered_component_has_a_fixture(files_to_lint):
    """Check that every component we render has an associated fixture file.

    In order to test that a particular react component can be
    server-side rendered, we need to actually try to render it with a
    particular value for props.  This is what component.fixture.js
    files are for.  We just make sure the people write them!

    For now, we allow the fixture file to be empty (just `[]`).  Later
    we may insist on actually useful fixtures.
    """
    files_to_lint = lintutil.filter(files_to_lint, suffix='.html')

    for f in files_to_lint:
        contents_of_f = lintutil.file_contents(f)
        for m in RENDER_REACT_RE.finditer(contents_of_f):
            component_file = m.group(1)
            # To be server-side renderable, the fixture file has to be
            # a javascript file, not jsx or something else.
            fixture_file = component_file + '.fixture.js'
            if not os.path.exists(ka_root.join(fixture_file)):
                linenum = contents_of_f.count('\n', 0, m.start()) + 1
                yield (f, linenum,
                       '%s must have an associated fixture file %s' %
                       (component_file, fixture_file))
def lint_banned_file_types(files_to_lint):
    """Find all the bad files using banned extensions and complain."""
    bad_files = lintutil.filter(files_to_lint,
                                suffix=_BAD_EXTENSIONS,
                                exclude_substrings=_EXISTING_FILE_WHITELIST)
    for bad_file in bad_files:
        yield (bad_file, 1, "Use React and Aphrodite instead.")
Exemplo n.º 12
0
def lint_jsx_fixture_files(files_to_lint):
    """Ensure that fixture files have an associated fixtures_test.js file."""
    # We only want fixture files
    fixture_files_to_lint = lintutil.filter(files_to_lint,
                                            suffix=('.jsx.fixture.js',
                                                    '.jsx.fixture.jsx'))

    for fixture_file in fixture_files_to_lint:
        path, fixture_name = os.path.split(fixture_file)

        # We need to potentially walk up the tree to find the "-package"
        # directory which will old the fixtures_test.js file.
        while path and not path.endswith("-package"):
            path = os.path.dirname(path)
        fixture_test_file = os.path.join(path, "fixtures_test.js")

        # Check to see if the fixtures_test.js file is completely missing
        if not os.path.isfile(fixture_test_file):
            yield (fixture_file, 1,
                   "Fixture files must have an associated fixtures_test.js "
                   "file. Run tools/regen_fixture_tests.sh to create it.")
        else:
            # Otherwise, it exists, so make sure that the fixture file is
            # referenced inside the fixtures_test.js file.
            search_re = re.compile(r'/%s"' % re.escape(fixture_name))
            if not lintutil.line_number(fixture_test_file, search_re, 0):
                yield (fixture_test_file, 1, "Fixture test is missing %s. Run "
                       "tools/regen_fixture_tests.sh to update it." %
                       fixture_name)
Exemplo n.º 13
0
def lint_no_templatetags_calls_in_python(files_to_lint):
    """Enforce that python code does not call code in templatetags/filters.

    templatetags.py and templatefilters.py are intended to hold code
    that emits to jinja2 templates.  For that reason, we allow things
    there that are hard to use correctly in python logic, such as
    jinja2.Markup() objects.  To make sure we keep the distinction
    between jinja2 objects and python objects separate, we check that
    python code never calls stuff in templatetags/templatefilters.

    Such dual-use code can, if needed, be moved to a separate file,
    where it can be called by both 'normal' python code and functions
    in templatetags.  For instance, templatefilters.py:static_url()
    just calls url_util.static_url().
    """
    # Only need to lint python files, but not test files.  Also:
    # templatetags and templatefilters routines can call each other,
    # config_jinja can reference them to add them to the jinja2 config.
    exclude = ('_test.py', '/templatetags.py', '/templatefilters.py',
               '/config_jinja.py')
    files = lintutil.filter(files_to_lint, suffix='.py',
                            exclude_substrings=exclude)
    for (fname, linenum, line) in lintutil.find_lines(files,
                                                      _IMPORT_TEMPLATETAGS_RE):
        m = _IMPORT_TEMPLATETAGS_RE.search(line)
        module = m.group(1) or m.group(2) or m.group(3)
        yield (fname, linenum,
               '%s is for jinja2; do not call from Python code' % module)
Exemplo n.º 14
0
def lint_non_literal_i18n_do_not_translate(files_to_lint):
    """Find where we mark html as not needing translation but its non-literal

    We require anything we mark with i18n_do_not_translate to be only a string
    literal. i18n_do_not_translate marks the string as "safe" (that is jinja2
    won't autoescape it) so we need to be extra careful we don't create a
    potential XSS attack vector if we end up marking some variable as
    safe.

    Unless that line is marked with {{# @Nolint #}}, we flag it as a lint
    error.

    Returns:
       List of triples: (filename, lineno, error message)
    """
    # Limit files-to-lint to files under templates/ that are html or txt.
    files = lintutil.filter(files_to_lint,
                            prefix='templates/', suffix=('.html', '.txt'))

    for filename in files:
        contents = lintutil.file_contents(filename)
        for fn_match in _J2_FUNCTION_RE.finditer(contents):
            # Make sure there's no @Nolint anywhere around this function.
            newline = contents.find('\n', fn_match.end())
            newline = newline if newline > -1 else len(contents)
            if '@Nolint' in contents[fn_match.start():newline]:
                continue

            for m in _BAD_DO_NOT_TRANSLATE_RE.finditer(fn_match.group(0)):
                linenum = 1 + contents.count('\n', 0,
                                             fn_match.start() + m.start(1))
                yield (filename, linenum,
                       '%s contains something that is not just a '
                       'literal string. Only literal strings can be '
                       'inside i18n_do_not_translate.' % m.group(1))
Exemplo n.º 15
0
def lint_absolute_import(files_to_lint):
    """Make sure we use absolute imports everywhere.

    Suppose you have code like a top-level util.py and also
    email/util.py, and you do "import util".  If you're doing that
    from email/foo.py, then it will import email/util.py instead of
    the top-level util.py.  Using absolute imports everywhere avoids
    the risk of that problem.  It also makes it more obvious to those
    looking at the code, exactly what's being imported.
    """
    files_to_lint = lintutil.filter(files_to_lint, suffix='.py')

    for f in files_to_lint:
        import_lines = _get_import_lines(f)

        if not import_lines:
            # No imports?  Then no need for an absolute-import directive.
            continue

        if any(i.module == '__future__.absolute_import' for i in import_lines):
            # Has an absolute-import, we're happy.
            continue

        contents = lintutil.file_contents(f)
        if contents.startswith('#!'):
            # If the file can be run as a script, then absolute-import
            # doesn't always work since we don't automatically have
            # webapp-root in the pythonpath for scripts.  So we don't
            # require absolute-import in that case.
            continue

        # __future__ imports come first, so the best line number to
        # report for the error is the first import line.
        yield (f, import_lines[0].lineno,
               'Modules must use: from __future__ import absolute_import')
Exemplo n.º 16
0
def lint_non_literal_i18n_in_javascript(files_to_lint):
    """Complain about uses of i18n._() on something other than a string.

    i18n._(variable) is dangerous -- you don't know if the variable
    has been translated or not.
    """
    files_to_lint = lintutil.filter(files_to_lint,
                                    suffix=('.js', '.jsx'),
                                    exclude_substrings=('/i18n.js',
                                                        '/i18n_test.js'))

    # This regexp pattern captures a string, possibly concatenated with +'s.
    js_str = (r'(?:' + r'"(?:\\.|[^"])*"|' + r"'(?:\\.|[^'])*'|" +
              r'`(?:\\.|[^`])*`' + ')')
    js_concat_str = '\s*%s(?:\s*\+\s*%s)*\s*' % (js_str, js_str)

    gettext_occurrences = re.compile(r'\b(i18n._|i18n.ngettext)\(')
    valid_gettext_occurrences = re.compile(
        r'\bi18n._\(%(str)s[,)]|\bi18n.ngettext\(%(str)s,\s*%(str)s[,)]' %
        {'str': js_concat_str})

    for f in files_to_lint:
        contents = lintutil.file_contents(f)
        all_occurrences = {
            m.start(): m
            for m in gettext_occurrences.finditer(contents)
        }
        valid_occurrences = {
            m.start(): m
            for m in valid_gettext_occurrences.finditer(contents)
        }

        for (startpos, m) in all_occurrences.iteritems():
            i18n_fn = m.group(1)
            msg = None  # set to non-None if there's a problem.

            if startpos not in valid_occurrences:
                msg = ('%s must have string literals as arguments, '
                       'with no variables or templates' % i18n_fn)
            else:
                # Then we're ok with this!  *Unless* it's a template string
                # with $(...) inside it, then we're not ok.
                m2 = valid_occurrences[startpos]
                if m2.group().count(r'${') > m2.group().count(r'\${'):
                    msg = ('You must use %' + '(...)s with template strings '
                           'inside %s, not ${...}' % i18n_fn)

            if msg:
                start_lineno = 1 + contents.count('\n', 0, startpos)
                # Doing a real regexp to find the end of this function call
                # is tough, we just do something simple and pray.
                end_paren = contents.find(')', startpos)
                if end_paren == -1:
                    end_paren = len(contents)
                end_lineno = 1 + contents.count('\n', 0, end_paren)
                if any(
                        lintutil.has_nolint(f, lineno)
                        for lineno in xrange(start_lineno, end_lineno + 1)):
                    continue
                yield (f, start_lineno, msg)
Exemplo n.º 17
0
def lint_non_literal_i18n_in_python(files_to_lint):
    """Complain about uses of i18n._() on something other than a string.

    i18n._(variable) is dangerous -- you don't know if the variable
    has been translated or not.  Sometimes it's ok, but usually it's a
    mistake, and a better solution is to pass in translated_variable
    instead.  (The OK cases can be marked with @Nolint.)
    """
    current_state = None  # not currently in a gettext context
    gettext_linenum = None  # linenum of current gettext call
    has_nolint = False  # any line of the i18n._ may have @nolint

    # Only need to lint python files, but not test files.
    files = lintutil.filter(files_to_lint,
                            suffix='.py',
                            exclude_substrings=['_test.py'])

    for (fname, ttype, token, (linenum, _), _,
         line) in (lintutil.python_tokens(files)):
        # Don't lint *definitions* of these methods.
        if line.strip().startswith('def '):
            continue

        # The state machine may transition on either the type
        # of the token, or its literal value.
        if token in _GETTEXT_STATE_MACHINE[current_state]:
            current_state = token
            gettext_linenum = gettext_linenum or linenum
            has_nolint = has_nolint or '@Nolint' in line
        elif ttype in _GETTEXT_STATE_MACHINE[current_state]:
            current_state = ttype
            gettext_linenum = gettext_linenum or linenum
            has_nolint = has_nolint or '@Nolint' in line
        elif current_state is None:
            # We weren't in gettext before, and we're still not.
            pass
        else:
            # We're in gettext, but can't transition: we're bad.
            # Give ourselves *one more* chance for nolint.
            has_nolint = has_nolint or '@Nolint' in line
            if current_state in _START_TOKENS:
                # BUT: If we last saw _ or ngettext and can't
                # transition because we're not a '(', that
                # means it's not a function call, so we can
                # ignore it.  e.g. '(f, _) = file_and_line()'.
                pass
            elif not has_nolint:
                yield (fname, gettext_linenum,
                       'gettext-like calls should only have '
                       'literal strings as their arguments')
            current_state = None
            gettext_linenum = None
            has_nolint = False

        # If we've happily ended a gettext call, clear the state.
        if _GETTEXT_STATE_MACHINE[current_state] is None:
            current_state = None
            gettext_linenum = None
            has_nolint = False
Exemplo n.º 18
0
def lint_non_literal_i18n_in_python(files_to_lint):
    """Complain about uses of i18n._() on something other than a string.

    i18n._(variable) is dangerous -- you don't know if the variable
    has been translated or not.  Sometimes it's ok, but usually it's a
    mistake, and a better solution is to pass in translated_variable
    instead.  (The OK cases can be marked with @Nolint.)
    """
    current_state = None    # not currently in a gettext context
    gettext_linenum = None  # linenum of current gettext call
    has_nolint = False      # any line of the i18n._ may have @nolint

    # Only need to lint python files, but not test files.
    files = lintutil.filter(files_to_lint, suffix='.py',
                            exclude_substrings=['_test.py'])

    for (fname, ttype, token, (linenum, _), _, line) in (
            lintutil.python_tokens(files)):
        # Don't lint *definitions* of these methods.
        if line.strip().startswith('def '):
            continue

        # The state machine may transition on either the type
        # of the token, or its literal value.
        if token in _GETTEXT_STATE_MACHINE[current_state]:
            current_state = token
            gettext_linenum = gettext_linenum or linenum
            has_nolint = has_nolint or '@Nolint' in line
        elif ttype in _GETTEXT_STATE_MACHINE[current_state]:
            current_state = ttype
            gettext_linenum = gettext_linenum or linenum
            has_nolint = has_nolint or '@Nolint' in line
        elif current_state is None:
            # We weren't in gettext before, and we're still not.
            pass
        else:
            # We're in gettext, but can't transition: we're bad.
            # Give ourselves *one more* chance for nolint.
            has_nolint = has_nolint or '@Nolint' in line
            if current_state in _START_TOKENS:
                # BUT: If we last saw _ or ngettext and can't
                # transition because we're not a '(', that
                # means it's not a function call, so we can
                # ignore it.  e.g. '(f, _) = file_and_line()'.
                pass
            elif not has_nolint:
                yield (fname, gettext_linenum,
                       'gettext-like calls should only have '
                       'literal strings as their arguments')
            current_state = None
            gettext_linenum = None
            has_nolint = False

        # If we've happily ended a gettext call, clear the state.
        if _GETTEXT_STATE_MACHINE[current_state] is None:
            current_state = None
            gettext_linenum = None
            has_nolint = False
Exemplo n.º 19
0
def lint_non_literal_i18n_in_javascript(files_to_lint):
    """Complain about uses of i18n._() on something other than a string.

    i18n._(variable) is dangerous -- you don't know if the variable
    has been translated or not.
    """
    files_to_lint = lintutil.filter(
        files_to_lint, suffix=('.js', '.jsx'),
        exclude_substrings=('/i18n.js', '/i18n_test.js'))

    # This regexp pattern captures a string, possibly concatenated with +'s.
    js_str = (r'(?:' +
              r'"(?:\\.|[^"])*"|' +
              r"'(?:\\.|[^'])*'|" +
              r'`(?:\\.|[^`])*`' +
              ')')
    js_concat_str = '\s*%s(?:\s*\+\s*%s)*\s*' % (js_str, js_str)

    gettext_occurrences = re.compile(r'\b(i18n._|i18n.ngettext)\(')
    valid_gettext_occurrences = re.compile(
        r'\bi18n._\(%(str)s[,)]|\bi18n.ngettext\(%(str)s,\s*%(str)s[,)]'
        % {'str': js_concat_str})

    for f in files_to_lint:
        contents = lintutil.file_contents(f)
        all_occurrences = {
            m.start(): m for m in gettext_occurrences.finditer(contents)}
        valid_occurrences = {
            m.start(): m for m in valid_gettext_occurrences.finditer(contents)}

        for (startpos, m) in all_occurrences.iteritems():
            i18n_fn = m.group(1)
            msg = None      # set to non-None if there's a problem.

            if startpos not in valid_occurrences:
                msg = ('%s must have string literals as arguments, '
                       'with no variables or templates' % i18n_fn)
            else:
                # Then we're ok with this!  *Unless* it's a template string
                # with $(...) inside it, then we're not ok.
                m2 = valid_occurrences[startpos]
                if m2.group().count(r'${') > m2.group().count(r'\${'):
                    msg = ('You must use %' + '(...)s with template strings '
                           'inside %s, not ${...}' % i18n_fn)

            if msg:
                start_lineno = 1 + contents.count('\n', 0, startpos)
                # Doing a real regexp to find the end of this function call
                # is tough, we just do something simple and pray.
                end_paren = contents.find(')', startpos)
                if end_paren == -1:
                    end_paren = len(contents)
                end_lineno = 1 + contents.count('\n', 0, end_paren)
                if any(lintutil.has_nolint(f, lineno)
                       for lineno in xrange(start_lineno, end_lineno + 1)):
                    continue
                yield (f, start_lineno, msg)
Exemplo n.º 20
0
def lint_all_fixtures_match_components(files_to_lint):
    files = lintutil.filter(files_to_lint,
                            prefix='javascript/', suffix='.fixture.js')
    for f in files:
        component_path = f[:-len(".fixture.js")]
        if not os.path.isfile(component_path):
            yield(f, 1,
                  "Expected to find a React component at '{}'. If you just "
                  "moved that component, be sure to move "
                  "this fixture too.".format(component_path))
Exemplo n.º 21
0
def lint_all_fixtures_match_components(files_to_lint):
    files = lintutil.filter(files_to_lint,
                            prefix='javascript/',
                            suffix='.fixture.js')
    for f in files:
        component_path = f[:-len(".fixture.js")]
        if not os.path.isfile(component_path):
            yield (f, 1,
                   "Expected to find a React component at '{}'. If you just "
                   "moved that component, be sure to move "
                   "this fixture too.".format(component_path))
Exemplo n.º 22
0
def lint_yamls_are_parseable(files_to_lint):
    """Enforce that any yamls to be uploaded to app engine are parseable."""
    files_to_lint = lintutil.filter(files_to_lint, suffix='.yaml')
    for filename in files_to_lint:
        with open(filename) as f:
            try:
                yaml.safe_load(f)
            except yaml.parser.ParserError as e:
                yield (filename,
                       e.problem_mark.line + 1,  # yaml.Mark is 0-indexed
                       'Error parsing yaml: %s %s.' % (e.problem, e.context))
Exemplo n.º 23
0
def lint_yamls_are_parseable(files_to_lint):
    """Enforce that any yamls to be uploaded to app engine are parseable."""
    files_to_lint = lintutil.filter(files_to_lint, suffix='.yaml')
    for filename in files_to_lint:
        with open(filename) as f:
            try:
                yaml.safe_load(f)
            except yaml.parser.ParserError as e:
                yield (
                    filename,
                    e.problem_mark.line + 1,  # yaml.Mark is 0-indexed
                    'Error parsing yaml: %s %s.' % (e.problem, e.context))
Exemplo n.º 24
0
def lint_fixture_files(files_to_lint):
    # We only care about "plain" jsx files
    # We also want to ignore test files
    jsx_files_to_lint = lintutil.filter(files_to_lint, suffix=('.jsx'),
        exclude_substrings=_JSX_NO_FIXTURE_WHITELIST +
            ['testfiles', 'testutils', '.fixture.jsx'])

    for jsx_file in jsx_files_to_lint:
        if (not os.path.isfile(jsx_file + ".fixture.js") and
                not os.path.isfile(jsx_file + ".fixture.jsx")):
            yield (jsx_file, 1,
                   ".jsx files must have an associated "
                   ".jsx.fixture.js file.")
Exemplo n.º 25
0
def lint_comma(files_to_lint):
    """Find imports that use a comma to import multiple things on one line.

    KA style is we only have one import per line, so we just complain.
    """
    files_to_lint = lintutil.filter(files_to_lint, suffix='.py')

    for f in files_to_lint:
        import_lines = _get_import_lines(f)
        for import_line in import_lines:
            if import_line.has_comma:
                yield (f, import_line.lineno,
                       "Use one import per line, rather than a comma.")
Exemplo n.º 26
0
def lint_fixture_files(files_to_lint):
    # We only care about "plain" jsx files
    # We also want to ignore test files
    jsx_files_to_lint = lintutil.filter(files_to_lint, suffix=('.jsx'),
        exclude_substrings=_JSX_NO_FIXTURE_WHITELIST +
            ['testfiles', 'testutils', '.fixture.jsx'])

    for jsx_file in jsx_files_to_lint:
        if (not os.path.isfile(jsx_file + ".fixture.js") and
                not os.path.isfile(jsx_file + ".fixture.jsx")):
            yield (jsx_file, 1,
                   ".jsx files must have an associated "
                   ".jsx.fixture.js file.")
Exemplo n.º 27
0
def lint_js_test_files(files_to_lint):
    # We only want .js files that are inside the javascript directory
    # We don't care if tests or fixtures have tests
    js_files_to_lint = lintutil.filter(files_to_lint,
        prefix=('javascript/'),
        suffix=('.js'),
        exclude_substrings=['.jsx.fixture.js', '_test.js'] +
            _JS_NO_TEST_WHITELIST)

    for js_file in js_files_to_lint:
        test_file = js_file.replace(".js", "_test.js")
        if not os.path.isfile(test_file):
            yield (js_file, 1,
                   ".js files must have an associated _test.js file.")
Exemplo n.º 28
0
def lint_js_test_files(files_to_lint):
    # We only want .js files that are inside the javascript directory
    # We don't care if tests or fixtures have tests
    js_files_to_lint = lintutil.filter(files_to_lint,
        prefix=('javascript/'),
        suffix=('.js'),
        exclude_substrings=['.jsx.fixture.js', '_test.js'] +
            _JS_NO_TEST_WHITELIST)

    for js_file in js_files_to_lint:
        test_file = js_file.replace(".js", "_test.js")
        if not os.path.isfile(test_file):
            yield (js_file, 1,
                   ".js files must have an associated _test.js file.")
Exemplo n.º 29
0
def lint_gae_specific_code(files_to_lint):
    """Enforce that we do not add new code that will not work on flex.

    This lint check is only for the 'main' webapp service; other services
    (in services/) may be running on appengine classic, and can use this
    functionality.  In fact, sometimes we separated them out into services
    for just that reason!
    """
    files_to_lint = lintutil.filter(files_to_lint,
                                    suffix='.py',
                                    exclude_substrings=['/services/'])
    for (bad_module_re, bad_module, replacement) in _BAD_MODULES_RES:
        for (f, linenum, _) in lintutil.find_lines(files_to_lint,
                                                   bad_module_re):
            yield (f, linenum, 'Do not import %s -- use %s instead.' %
                   (bad_module, replacement))
Exemplo n.º 30
0
def lint_gae_specific_code(files_to_lint):
    """Enforce that we do not add new code that will not work on flex.

    This lint check is only for the 'main' webapp service; other services
    (in services/) may be running on appengine classic, and can use this
    functionality.  In fact, sometimes we separated them out into services
    for just that reason!
    """
    files_to_lint = lintutil.filter(
        files_to_lint, suffix='.py', exclude_substrings=['/services/'])
    for (bad_module_re, bad_module, replacement) in _BAD_MODULES_RES:
        for (f, linenum, _) in lintutil.find_lines(files_to_lint,
                                                   bad_module_re):
            yield (f, linenum,
                   'Do not import %s -- use %s instead.'
                   % (bad_module, replacement))
Exemplo n.º 31
0
def lint_no_backslashes(files_to_lint):
    """Complain if any import lines use backslashes for line continuation.

    While our style guide discourages end-of-line backslashes for line
    continuation, it's not normally something we'd lint on.  But I do
    in this case because the regexp for other import-linters will get
    confused in the presense of backslashes, and it's easy enough to
    use parens instead.
    """
    backslash_re = re.compile(r'^\s*(from|import)\b.*\\$', re.MULTILINE)

    files_to_lint = lintutil.filter(files_to_lint, suffix='.py')

    for (filename, linenum, _) in lintutil.find_lines(files_to_lint,
                                                      backslash_re):
        yield (filename, linenum,
               "Use parens instead of backslash for line-continuation")
Exemplo n.º 32
0
def lint_no_user_property(files_to_lint):
    """Enforce that nobody uses UserProperty.

    ...unless marked as an explicitly approved legacy usage via @Nolint.
    """
    files_to_lint = lintutil.filter(files_to_lint, suffix='.py')
    for filename in files_to_lint:
        contents = lintutil.file_contents(filename)
        for fn_match in _USERPROPERTY_RE.finditer(contents):
            # Make sure there's no @Nolint anywhere around this function.
            newline = contents.find('\n', fn_match.end())
            newline = newline if newline > -1 else len(contents)
            if '@Nolint' in contents[fn_match.start():newline]:
                continue

            linenum = 1 + contents.count('\n', 0, fn_match.start())
            yield (filename, linenum,      # filename and linenum
                   "Do not use UserProperty, it is not safe. Use UserData's "
                   "key as its foreign key, instead.")
Exemplo n.º 33
0
def lint_symbol_imports(files_to_lint):
    """Find cases where we import a symbol rather than a module.

    For instance, "from main import application".  That is not a module,
    it's a variable defined within a module.
    """
    files_to_lint = lintutil.filter(files_to_lint, suffix='.py')

    for f in files_to_lint:
        import_lines = _get_import_lines(f)
        for import_line in import_lines:
            # You can only import a symbol via the 'from ... import' syntax
            if not import_line.has_from:
                continue
            if import_line.module.startswith('__future__'):  # not a real dir
                continue

            # TODO(csilvers): our style guide has an exception
            # allowing the import of individual symbols from
            # third-party packages which document this as a best
            # practice.  Add a special-case to ignore those, here.
            # (Or amend the style guide to remove the exception :-) )

            if not _is_a_module_or_package(import_line.module):
                # While we may not be a module or a package, the thing
                # containing us should definitely be.  If it's not, or
                # if it's a package but an empty one, then we're
                # importing something that doesn't (now) exist,
                # probably an auto-generated package.  We just ignore
                # it; we can't say anything useful about it.
                module_parent = import_line.module.rsplit('.', 1)[0]
                module_parent_as_package = ka_root.join(
                    module_parent.replace('.', os.sep), '__init__.py')
                if (not _is_a_module_or_package(module_parent) or
                    (os.path.exists(module_parent_as_package)
                     and os.stat(module_parent_as_package).st_size == 0)):
                    continue

                actual_module = import_line.module.rsplit('.', 1)[0]
                yield (f, import_line.lineno,
                       "Import the whole module %s, not individual symbols "
                       "inside it" % actual_module)
Exemplo n.º 34
0
def lint_iife_in_html_javascript(files_to_lint):
    """Make sure js in <script> tags doesn't leak globals by using an IIFE.

    IIFE is when you surround js code in '(function() { ... })();'.
    This prevents any variable declarations from polluting the global
    scope.  We require all <script> tags to have an IIFE if they
    declare a variable.
    """
    files = lintutil.filter(files_to_lint, prefix='templates/', suffix='.html')

    # For determining if we need an IIFE, we only care about 'var'
    # statements.
    var_re = re.compile(r'\bvar\b')

    for f in files:
        contents_of_f = lintutil.file_contents(f)

        # extract_js_from_html actually can return several versions
        # of the js contents, each with a different branch of an
        # if/else commented out.  (So for input like
        #  <script>var x = { {%if c%}y: 4{%else%}y: 5{%endif%} };</script>
        # we'd see both 'var x = { y: 4 };' and 'var x = { y: 5 }')
        # We lint all such strings and combine the output.
        js_contents_iter = js_in_html.extract_js_from_html(contents_of_f,
                                                           "jinja2",
                                                           keep_re=var_re,
                                                           file_name=f)
        try:
            for js_contents in js_contents_iter:
                if (not _IIFE_RE.match(js_contents) and
                        var_re.search(js_contents)):
                    # Our script is the first non-blank line in the
                    # extracted output.
                    lineno = len(js_contents) - len(js_contents.lstrip('\n'))
                    if not lintutil.has_nolint(f, lineno + 1):
                        yield (f, lineno + 1,
                               "Must wrap this script's contents in "
                               "'(function() { ... })();' (IIFE) to "
                               "avoid leaking vars to global state")
        except Exception, why:
            yield (f, 1, 'HTML parse error: %s' % why)
            continue
Exemplo n.º 35
0
def lint_no_jinja2_markup_in_python(files_to_lint):
    """Enforce that python code does not call jinja2.Markup().

    jinja2.Markup(s) says that jinja2 should not auto-escape s.  This
    is certainly useful to be able to do, but we shouldn't be doing it
    in python code (unless we're writing a jinja2 filter or tag)!
    Instead, the jinja2 template itself should be using the |safe
    modifier.  We enforce that this happens.

    The reason is that Markup() objects look like unicode but have
    these magical properties that can explode and cause trouble
    unexpectedly -- usually html-escaping things that are interpolated
    into it, without us wanting to.  As one example of the trouble it
    can cause, there's this code in api/jsonify.py:

        if isinstance(obj, jinja2.Markup):
            # Use the plain str representation of jinja2 Markup for jsonify.
            # Otherwise jinja2's Markup object will try to improperly encode
            # the double quotes that jsonify wraps around all strings.
            return unicode(obj)

    Needing to sprinkle special-case code like that everywhere is
    ugly; figuring out where such code needs to be sprinkled is a
    nightmare.

    As always, code that needs to override this check can use @Nolint.
    """
    # Only need to lint python files, but not test files.
    # And Markup() is expected in template-tags and template-filters.
    exclude = ('_test.py', '/templatetags.py', '/templatefilters.py')
    files = lintutil.filter(files_to_lint, suffix='.py',
                            exclude_substrings=exclude)

    for tokens in lintutil.find_token_streams(files, _JINJA2_MARKUP_TOKENS):
        yield (tokens[0][0], tokens[0][3][0],      # filename and linenum
               'Do not call jinja2.Markup() from code'
               ' (except in templatetags/filters).'
               ' Use |safe in templates instead.')

    for (fname, linenum, _) in lintutil.find_lines(files, _IMPORT_MARKUP_RE):
        yield (fname, linenum,
               'Import full modules, not classes from modules (Markup)')
Exemplo n.º 36
0
def lint_no_user_property(files_to_lint):
    """Enforce that nobody uses UserProperty.

    ...unless marked as an explicitly approved legacy usage via @Nolint.
    """
    files_to_lint = lintutil.filter(files_to_lint, suffix='.py')
    for filename in files_to_lint:
        contents = lintutil.file_contents(filename)
        for fn_match in _USERPROPERTY_RE.finditer(contents):
            # Make sure there's no @Nolint anywhere around this function.
            newline = contents.find('\n', fn_match.end())
            newline = newline if newline > -1 else len(contents)
            if '@Nolint' in contents[fn_match.start():newline]:
                continue

            linenum = 1 + contents.count('\n', 0, fn_match.start())
            yield (
                filename,
                linenum,  # filename and linenum
                "Do not use UserProperty, it is not safe. Use UserData's "
                "key as its foreign key, instead.")
Exemplo n.º 37
0
def lint_redundant_imports(files_to_lint):
    """You don't need to do both `import a.b` and `from a import b`!"""
    files_to_lint = lintutil.filter(files_to_lint, suffix='.py')

    for f in files_to_lint:
        import_lines = _get_import_lines(f)
        import_modules = {}  # map from module to import_line

        for import_line in import_lines:
            other_import = import_modules.get(import_line.module, None)
            if other_import:
                yield (f, import_line.lineno,
                       "Already imported on line %s" % other_import.lineno)
            else:
                # It's common to import the same module multiple times
                # if it's within functions (the import only exists in
                # a limited scope) or try/except blocks, so we don't
                # complain if something shadows another non-top-level
                # import.
                if import_line.colno == 0:
                    import_modules[import_line.module] = import_line
Exemplo n.º 38
0
def lint_circular_imports(files_to_lint):
    """Report on all circular imports found in files_to_lint.

    A circular import is when A imports B imports C imports A.  We
    focus only on *simple* cycles, where each node occurs only once in
    a cycle (so a figure-8 would be two cycles, one for the top circle
    and one for the bottom, not a single, more complicated cycle).
    The rule is that we report all cycles that include any files in
    files_to_lint.  (For reproducibility, we always print the cycle
    starting with the file in the cycle that comes first
    alphabetically.)

    It's very possible for a single file to have multiple circular
    imports.  We report them all.

    """
    files_to_lint = lintutil.filter(files_to_lint, suffix='.py')
    module_to_file = {
        ka_root.relpath(os.path.splitext(f)[0]).replace(os.sep, '.'): f
        for f in files_to_lint
    }

    saw_a_cycle = False
    for cycle in sorted(_find_all_cycles(module_to_file.keys())):
        saw_a_cycle = True
        # We report the line-number where cycle[0] imports cycle[1].
        f = module_to_file.get(cycle[0])
        if f is None:  # means this cycle does not include any files-to-lint
            continue
        import_lines = _get_import_lines(f)
        desired_import_line = next(il for il in import_lines
                                   if il.module == cycle[1])
        yield (f, desired_import_line.lineno,
               ('Resolve this import cycle (by making a late import): %s' %
                ' -> '.join(cycle)))

    if not saw_a_cycle:
        # This is only safe to do when there are no cycles already!
        for error in _lint_unnecessary_late_imports(files_to_lint):
            yield error
Exemplo n.º 39
0
def lint_no_wrong_i18n_markup_in_jinja2(files_to_lint):
    """Find where we mark js within html with i18n._ instead of {{ _js()

    Returns:
       List of triples: (filename, lineno, error message)
    """
    files = lintutil.filter(files_to_lint, prefix='templates/', suffix='.html')
    for filename in files:
        contents = lintutil.file_contents(filename)
        for fn_match in _BAD_JS_MARKUP.finditer(contents):
            # Make sure there's no @Nolint anywhere around this function.
            newline = contents.find('\n', fn_match.end())
            newline = newline if newline > -1 else len(contents)
            if '@Nolint' in contents[fn_match.start():newline]:
                continue

            for m in _BAD_JS_MARKUP.finditer(fn_match.group(0)):
                linenum = 1 + contents.count('\n', 0,
                                             fn_match.start() + m.start(1))
                yield (filename, linenum,
                       'Do {{ _js("%s") }} instead of %s inside <script> '
                       'tags within jinja2.' % (m.group(1), m.group(0)))
Exemplo n.º 40
0
def lint_no_wrong_i18n_markup_in_jinja2(files_to_lint):
    """Find where we mark js within html with i18n._ instead of {{ _js()

    Returns:
       List of triples: (filename, lineno, error message)
    """
    files = lintutil.filter(files_to_lint, prefix='templates/', suffix='.html')
    for filename in files:
        contents = lintutil.file_contents(filename)
        for fn_match in _BAD_JS_MARKUP.finditer(contents):
            # Make sure there's no @Nolint anywhere around this function.
            newline = contents.find('\n', fn_match.end())
            newline = newline if newline > -1 else len(contents)
            if '@Nolint' in contents[fn_match.start():newline]:
                continue

            for m in _BAD_JS_MARKUP.finditer(fn_match.group(0)):
                linenum = 1 + contents.count('\n', 0,
                                             fn_match.start() + m.start(1))
                yield (filename, linenum,
                       'Do {{ _js("%s") }} instead of %s inside <script> '
                       'tags within jinja2.' % (m.group(1), m.group(0)))
Exemplo n.º 41
0
def lint_all_wsgi_entrypoint_imports(files_to_lint):
    """Enforce that every WSGI entrypoint imports appengine_config first.

    On App Engine Standard, appengine_config is guaranteed to be imported
    before anything else.  But this isn't guaranteed on VM, which is bad
    because our code assumes it!  So we require every WSGI entrypoint to
    manually import it first thing, before any other imports.  (It likely only
    matters that it's before all first-party and shared imports, but we require
    that it be first to keep things simple.  We do allow __future__ imports
    first, since those are easy to check and python wants them first.)
    """
    entrypoint_filenames = set()
    for gae_module_name in modules_util.all_modules:
        entrypoint_module_names = (
            modules_util.app_yaml_entrypoint_modules(gae_module_name))
        for entrypoint_module_name in entrypoint_module_names:
            filename = importlib.import_module(entrypoint_module_name).__file__
            # replace .pyc, .pyo etc.
            filename = os.path.splitext(filename)[0] + '.py'
            entrypoint_filenames.add(ka_root.relpath(filename))

    if lintutil.filter(files_to_lint, suffix='.yaml'):
        # If any yaml files were modified, we need to recheck all entrypoints.
        # TODO(benkraft): We should really only need to do this if any
        # module-yaml, or any yaml included by one, has changed, but
        # determining which those are is tricky, so we assume they all are.
        filenames_to_check = entrypoint_filenames
    else:
        # Otherwise, we just need to check changed entrypoints.
        files_to_lint = set(ka_root.relpath(f) for f in files_to_lint)
        filenames_to_check = entrypoint_filenames & files_to_lint

    for filename in filenames_to_check:
        if filename.startswith('third_party/'):
            continue
        maybe_error = _lint_single_wsgi_entrypoint_import(filename)
        if maybe_error:
            yield maybe_error
Exemplo n.º 42
0
def lint_all_wsgi_entrypoint_imports(files_to_lint):
    """Enforce that every WSGI entrypoint imports appengine_config first.

    On App Engine Standard, appengine_config is guaranteed to be imported
    before anything else.  But this isn't guaranteed on VM, which is bad
    because our code assumes it!  So we require every WSGI entrypoint to
    manually import it first thing, before any other imports.  (It likely only
    matters that it's before all first-party and shared imports, but we require
    that it be first to keep things simple.  We do allow __future__ imports
    first, since those are easy to check and python wants them first.)
    """
    entrypoint_filenames = set()
    for gae_module_name in modules_util.all_modules:
        entrypoint_module_names = (
            modules_util.app_yaml_entrypoint_modules(gae_module_name))
        for entrypoint_module_name in entrypoint_module_names:
            filename = importlib.import_module(entrypoint_module_name).__file__
            # replace .pyc, .pyo etc.
            filename = os.path.splitext(filename)[0] + '.py'
            entrypoint_filenames.add(ka_root.relpath(filename))

    if lintutil.filter(files_to_lint, suffix='.yaml'):
        # If any yaml files were modified, we need to recheck all entrypoints.
        # TODO(benkraft): We should really only need to do this if any
        # module-yaml, or any yaml included by one, has changed, but
        # determining which those are is tricky, so we assume they all are.
        filenames_to_check = entrypoint_filenames
    else:
        # Otherwise, we just need to check changed entrypoints.
        files_to_lint = set(ka_root.relpath(f) for f in files_to_lint)
        filenames_to_check = entrypoint_filenames & files_to_lint

    for filename in filenames_to_check:
        if filename.startswith('third_party/'):
            continue
        maybe_error = _lint_single_wsgi_entrypoint_import(filename)
        if maybe_error:
            yield maybe_error
Exemplo n.º 43
0
def lint_non_literal_i18n_do_not_translate(files_to_lint):
    """Find where we mark html as not needing translation but its non-literal

    We require anything we mark with i18n_do_not_translate to be only a string
    literal. i18n_do_not_translate marks the string as "safe" (that is jinja2
    won't autoescape it) so we need to be extra careful we don't create a
    potential XSS attack vector if we end up marking some variable as
    safe.

    Unless that line is marked with {{# @Nolint #}}, we flag it as a lint
    error.

    Returns:
       List of triples: (filename, lineno, error message)
    """
    # Limit files-to-lint to files under templates/ that are html or txt.
    files = lintutil.filter(files_to_lint,
                            prefix='templates/',
                            suffix=('.html', '.txt'))

    for filename in files:
        contents = lintutil.file_contents(filename)
        for fn_match in _J2_FUNCTION_RE.finditer(contents):
            # Make sure there's no @Nolint anywhere around this function.
            newline = contents.find('\n', fn_match.end())
            newline = newline if newline > -1 else len(contents)
            if '@Nolint' in contents[fn_match.start():newline]:
                continue

            for m in _BAD_DO_NOT_TRANSLATE_RE.finditer(fn_match.group(0)):
                linenum = 1 + contents.count('\n', 0,
                                             fn_match.start() + m.start(1))
                yield (filename, linenum,
                       '%s contains something that is not just a '
                       'literal string. Only literal strings can be '
                       'inside i18n_do_not_translate.' % m.group(1))
Exemplo n.º 44
0
def lint_missing_safe_in_jinja2(files_to_lint):
    """Find instances where we translate html but don't mark it |safe.

    We html-escape the output of {{ _("...") }} in html templates, which
    is safe but can cause problems when the text is {{ _("<b>hi</b>") }}.
    In that case, the user needs to do {{ _("<b>hi</b>")|safe }}.

    We detect instances where the user does {{ _("...<..") }} in jinja2
    templates but lacks the |safe afterwards.  Unless that line is marked
    with {{# @Nolint #}}, we flag it as a lint error.

    Returns:
       List of triples: (filename, lineno, error message)
    """
    # Limit files-to-lint to html and txt files under templates/.
    files = lintutil.filter(files_to_lint,
                            prefix='templates/',
                            suffix=('.html', '.txt'))

    for filename in files:
        contents = lintutil.file_contents(filename)
        for fn_match in _J2_FUNCTION_RE.finditer(contents):
            # Make sure there's no @Nolint anywhere around this function.
            newline = contents.find('\n', fn_match.end())
            newline = newline if newline > -1 else len(contents)
            if '@Nolint' in contents[fn_match.start():newline]:
                continue

            for m in _GETTEXT_RE.finditer(fn_match.group(0)):
                if _needs_safe(m.group(1), m.group(2)):
                    linenum = 1 + contents.count('\n', 0,
                                                 fn_match.start() + m.start(1))
                    yield (filename, linenum,
                           'Replace %s with _("..."|safe) to avoid escaping'
                           'the tag/attr inside the _().' % m.group(1))
                for km in _KWARG_RE.finditer(m.group(3)):
                    if _needs_safe(km.group(1), km.group(2)):
                        linenum = 1 + contents.count(
                            '\n', 0,
                            fn_match.start() + m.start() + km.start(1))
                        yield (filename, linenum,
                               'Replace %s with _(..., foo="..."|safe) to '
                               'avoid escaping the tag/attr inside foo.' %
                               km.group(1))

            for m in _NGETTEXT_RE.finditer(fn_match.group(0)):
                if _needs_safe(m.group(1), m.group(2)):
                    linenum = 1 + contents.count('\n', 0,
                                                 fn_match.start() + m.start(1))
                    yield (filename, linenum,
                           'Replace %s with ngettext("..."|safe, ...) to '
                           'avoid escaping the tag/attr inside the _().' %
                           m.group(1))
                if _needs_safe(m.group(3), m.group(4)):
                    linenum = 1 + contents.count('\n', 0,
                                                 fn_match.start() + m.start(3))
                    yield (filename, linenum,
                           'Replace %s with ngettext(..., "..."|safe) to '
                           'avoid escaping the tag/attr inside the _().' %
                           m.group(3))
                for km in _KWARG_RE.finditer(m.group(5)):
                    if _needs_safe(km.group(1), km.group(2)):
                        linenum = 1 + contents.count(
                            '\n', 0,
                            fn_match.start() + m.start() + km.start(1))
                        yield (filename, linenum,
                               'Replace %s with ngettext(..., foo="..."|safe)'
                               ' to avoid escaping the tag/attr inside foo.' %
                               km.group(1))
Exemplo n.º 45
0
def lint_not_using_gettext_at_import_time(files_to_lint):
    """Make sure we don't use i18n._/etc in a static context.

    If you have a global variable such as '_FOO = i18n._("bar")', at
    the top of some .py file, it won't work the way you intend because
    i18n._() needs to be called while handling a request in order to
    know what language to translate to.  (Instead, you'd need to do
        _FOO = lambda: i18n._("bar")
    or some such.)

    This tests for this by mocking i18n._ et al., and then importing
    everything (but running nothing).  Any i18n._ calls that happen
    during this import are problematic!  We have to spawn a new
    python process to make sure we do the importing properly (and
    without messing with the currently running python environment!)
    """
    candidate_files_to_lint = lintutil.filter(files_to_lint, suffix='.py')
    files_to_lint = []

    for filename in candidate_files_to_lint:
        contents = lintutil.file_contents(filename)

        # Check that it's plausible this file uses i18n._ or similar.
        # This also avoids importing random third-party files that may
        # have nasty side-effects at import time (all our code is too
        # well-written to do that!)
        if 'import intl' in contents or 'from intl' in contents:
            files_to_lint.append(filename)

    program = """\
import os                # @Nolint(linter can't tell this is in a string!)
import sys               # @Nolint(linter can't tell this is in a string!)
import traceback

import intl.request      # @Nolint(seems unused to our linter but it's used)

_ROOT = "%s"

def add_lint_error(f):
    # We assume code in 'intl' doesn't make this mistake, and thus
    # the first stack-frame before we get into 'intl' is the
    # offending code.  ctx == '<string>' means the error occurred in
    # this pseudo-script.
    for (ctx, lineno, fn, line) in reversed(traceback.extract_stack()):
        if os.path.isabs(ctx):
            ctx = os.path.relpath(ctx, _ROOT)
        if ctx != '<string>' and not ctx.startswith('intl/'):
            if ctx == f:
                print 'GETTEXT ERROR {} {}'.format(ctx, lineno)
            break
    return 'en'     # a fake value for intl.request.ka_locale

""" % ka_root.root

    if not files_to_lint:
        return

    for filename in files_to_lint:
        modulename = ka_root.relpath(filename)
        modulename = os.path.splitext(modulename)[0]  # nix .py
        modulename = modulename.replace('/', '.')
        # Force a re-import.
        program += 'sys.modules.pop("%s", None)\n' % modulename
        program += ('intl.request.ka_locale = lambda: add_lint_error("%s")\n'
                    % ka_root.relpath(filename))
        program += 'import %s\n' % modulename

    p = subprocess.Popen(
        ['env', 'PYTHONPATH=%s' % ':'.join(sys.path),
         sys.executable, '-c', program],
        stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    p.wait()
    lint_output = p.stdout.read()
    for line in lint_output.splitlines():
        if line.startswith('GETTEXT ERROR '):
            line = line[len('GETTEXT ERROR '):]
            (filename, linenum) = line.rsplit(' ', 1)
            yield (ka_root.join(filename), int(linenum),
                   'Trying to translate at import-time, but '
                   'translation only works at runtime! '
                   'Use intl.i18n.mark_for_translation() instead.')
def lint_banned_file_types(files_to_lint):
    """Find all the bad files using banned extensions and complain."""
    bad_files = lintutil.filter(files_to_lint, suffix=_BAD_EXTENSIONS,
        exclude_substrings=_EXISTING_FILE_WHITELIST)
    for bad_file in bad_files:
        yield (bad_file, 1, "Use React and Aphrodite instead.")
Exemplo n.º 47
0
def lint_missing_safe_in_jinja2(files_to_lint):
    """Find instances where we translate html but don't mark it |safe.

    We html-escape the output of {{ _("...") }} in html templates, which
    is safe but can cause problems when the text is {{ _("<b>hi</b>") }}.
    In that case, the user needs to do {{ _("<b>hi</b>")|safe }}.

    We detect instances where the user does {{ _("...<..") }} in jinja2
    templates but lacks the |safe afterwards.  Unless that line is marked
    with {{# @Nolint #}}, we flag it as a lint error.

    Returns:
       List of triples: (filename, lineno, error message)
    """
    # Limit files-to-lint to html and txt files under templates/.
    files = lintutil.filter(files_to_lint,
                            prefix='templates/', suffix=('.html', '.txt'))

    for filename in files:
        contents = lintutil.file_contents(filename)
        for fn_match in _J2_FUNCTION_RE.finditer(contents):
            # Make sure there's no @Nolint anywhere around this function.
            newline = contents.find('\n', fn_match.end())
            newline = newline if newline > -1 else len(contents)
            if '@Nolint' in contents[fn_match.start():newline]:
                continue

            for m in _GETTEXT_RE.finditer(fn_match.group(0)):
                if _needs_safe(m.group(1), m.group(2)):
                    linenum = 1 + contents.count('\n', 0,
                                                 fn_match.start() + m.start(1))
                    yield (filename, linenum,
                           'Replace %s with _("..."|safe) to avoid escaping'
                           'the tag/attr inside the _().' % m.group(1))
                for km in _KWARG_RE.finditer(m.group(3)):
                    if _needs_safe(km.group(1), km.group(2)):
                        linenum = 1 + contents.count('\n', 0,
                                                     fn_match.start() +
                                                     m.start() + km.start(1))
                        yield (filename, linenum,
                               'Replace %s with _(..., foo="..."|safe) to '
                               'avoid escaping the tag/attr inside foo.'
                               % km.group(1))

            for m in _NGETTEXT_RE.finditer(fn_match.group(0)):
                if _needs_safe(m.group(1), m.group(2)):
                    linenum = 1 + contents.count('\n', 0,
                                                 fn_match.start() + m.start(1))
                    yield (filename, linenum,
                           'Replace %s with ngettext("..."|safe, ...) to '
                           'avoid escaping the tag/attr inside the _().'
                           % m.group(1))
                if _needs_safe(m.group(3), m.group(4)):
                    linenum = 1 + contents.count('\n', 0,
                                                 fn_match.start() + m.start(3))
                    yield (filename, linenum,
                           'Replace %s with ngettext(..., "..."|safe) to '
                           'avoid escaping the tag/attr inside the _().'
                           % m.group(3))
                for km in _KWARG_RE.finditer(m.group(5)):
                    if _needs_safe(km.group(1), km.group(2)):
                        linenum = 1 + contents.count('\n', 0,
                                                     fn_match.start() +
                                                     m.start() + km.start(1))
                        yield (filename, linenum,
                               'Replace %s with ngettext(..., foo="..."|safe)'
                               ' to avoid escaping the tag/attr inside foo.'
                               % km.group(1))
Exemplo n.º 48
0
def lint_static_url_modifier(files_to_lint):
    """Be sure every static url in an .html file uses |static_url or static().

    We serve static urls such as images and fonts from kastatic.org,
    for efficiency.  The |static_url and static_url() jinja2 modifiers are ways
    to rewrite urls to use that.  This linter makes sure people remember to do
    that for new code.
    """
    files = lintutil.filter(files_to_lint, prefix='templates/', suffix='.html')
    if not files:
        return

    # generate_static_urls_regexp matches standalone urls, but we need
    # to match them in the context of a line, so we need to mess with
    # ^ and $ a bit.  We just get rid of $, which could yield false
    # positives in theory but doesn't in practice.  We replace ^ with
    # something that matches 'things you might find before the start
    # of a static url but not before something like an API route name."
    static_url_regexp = url_util.STATIC_URL_REGEX
    static_url_regexp = static_url_regexp.replace('$', '')
    # The '%s' in the regexp below is to catch jinja2 html like:
    #   <img src="{{ "https://%s/images/..." % hostname }}">
    static_url_regexp = static_url_regexp.replace(
        '^/', r'([^\w_./-]["\'/]|khanacademy.org/|%s/)')

    # It's good when the static-url is followed by '|static_url' (it
    # needs to be in a '{{ ... }}' for that) or preceded by 'static_url('.
    # We say the regexp is bad if neither case is present.
    # This is cheaper than using a negative lookahead (I think).
    good_regexp = (r'(static_url\s*\(\s*)?(%s)([^}]*\|static_url)?'
                   % static_url_regexp)

    # We do two checks: one for the static url by name, and one for
    # any src= parameter (or poster=, which is used for videos).
    static_url_re = re.compile(good_regexp)
    # We take advantage of the fact we don't use spaces in our static urls.
    src_regex = re.compile(r'\b(src|poster)=((?:\{[^\}]*\}|[^ >])*)')

    for f in files:
        contents_of_f = lintutil.file_contents(f)
        error_lines = set()   # just to avoid duplicate error messages

        for m in static_url_re.finditer(contents_of_f):
            if not m.groups()[0] and not m.groups()[-1]:
                lineno = 1 + contents_of_f.count('\n', 0, m.start())
                # STATIC_URL_REGEX matches the character before the url-string,
                # and also the opening quote, so get rid of them both.
                url_prefix = m.group(2)[1:].strip('\'\"')
                yield (f, lineno,
                       ('The static url "%s..." should use "|static_url" '
                       + 'after the url or call "static_url()" with the url ' +
                       'passed in') % url_prefix)
                error_lines.add(lineno)

        # emails maybe want to point to ka.org to avoid confusion.
        # TODO(csilvers): should we point them to kastatic.org too?
        if '/emails/' not in f:
            for m in src_regex.finditer(contents_of_f):
                url = m.group(2).strip('\'\"{} ')
                if not url:
                    continue
                # A simple, but effective, effort to find urls that point to
                # non-ka domains.  (Most ka domains we use are .org!)
                if '.com' in url and '.appspot.com' not in url:
                    continue
                if url.startswith('data:'):
                    continue
                # If it's already a url that we serve off the CDN, don't
                # complain.
                if any(u in url for (_, u) in url_util.FASTLY_URL_MAP):
                    continue
                # If it's an <iframe src=...>, don't complain; those are
                # never static resources.  We just do a cheap check.
                end_of_line = contents_of_f.index('\n', m.end())
                if '</iframe>' in contents_of_f[m.end():end_of_line]:
                    continue

                if '|static_url' not in url:
                    lineno = 1 + contents_of_f.count('\n', 0, m.start())
                    if lineno not in error_lines:  # a new error
                        yield (f, lineno,
                               'Use |static_url with "%s" attributes'
                               % m.group(1))