示例#1
0
    def crawl(
        cls,
        segment: BaseSegment,
        queries: Dict[str, List["SelectCrawler"]],
        dialect: Dialect,
        recurse_into=True,
    ) -> Generator[Union[str, List["SelectCrawler"]], None, None]:
        """Find SELECTs, table refs, or value table function calls in segment.

        For each SELECT, yield a list of SelectCrawlers. As we find table
        references or function call strings, yield those.
        """
        buff = []
        for seg in segment.recursive_crawl("table_reference",
                                           "select_statement",
                                           recurse_into=recurse_into):
            if seg is segment:
                # If we are starting with a select_statement, recursive_crawl()
                # returns the statement itself. Skip that.
                continue

            if seg.type == "table_reference":
                if not seg.is_qualified() and seg.raw in queries:
                    # It's a CTE.
                    # :TRICKY: Pop the CTE from "queries" to help callers avoid
                    # infinite recursion. We could make this behavior optional
                    # someday, if necessary.
                    yield queries.pop(seg.raw)
                else:
                    # It's an external table.
                    yield seg.raw
            else:
                assert seg.type == "select_statement"
                buff.append(SelectCrawler(seg, dialect))
        if not buff:
            # If we reach here, the SELECT may be querying from a value table
            # function, e.g. UNNEST(). For our purposes, this is basically the
            # same as an external table. Return the "table" part as a string.
            table_expr = segment.get_child("table_expression")
            if table_expr:
                yield table_expr.raw
        yield buff
示例#2
0
 def _get_name_if_cte(select_statement: BaseSegment,
                      ancestor_segment: BaseSegment) -> Optional[str]:
     """Return name if CTE. If top-level, return None."""
     cte = None
     path_to = ancestor_segment.path_to(select_statement)
     for seg in path_to:
         if seg.is_type("common_table_expression"):
             cte = seg
             break
     select_name = cte.segments[0].raw if cte else None
     return select_name
示例#3
0
 def gather(cls, segment: BaseSegment,
            dialect: Dialect) -> Dict[Optional[str], List["SelectCrawler"]]:
     """Find top-level SELECTs and CTEs, return info."""
     queries = defaultdict(list)
     # We specify recurse_into=False because we only want top-level select
     # statmeents and CTEs. We'll deal with nested selects later as needed,
     # when processing their top-level parent.
     for select_statement in segment.recursive_crawl("select_statement",
                                                     recurse_into=False):
         select_name = cls._get_name_if_cte(select_statement, segment)
         queries[select_name].append(
             SelectCrawler(select_statement, dialect))
     return dict(queries)
示例#4
0
    def get(
        cls,
        segment: BaseSegment,
        queries: Dict[str, List["SelectCrawler"]],
        dialect: Dialect,
    ) -> Union[str, List["SelectCrawler"]]:
        """Find SELECTs, table refs, or value table function calls in segment.

        If we find a SELECT, return info list. Otherwise, return table name
        or function call string.
        """
        buff = []
        for seg in segment.recursive_crawl(
            "table_reference", "select_statement", recurse_into=False
        ):
            if seg is segment:
                # If we are starting with a select_statement, recursive_crawl()
                # returns the statement itself. Skip that.
                continue

            if seg.type == "table_reference":
                if not seg.is_qualified() and seg.raw in queries:
                    # It's a CTE.
                    return queries[seg.raw]
                else:
                    # It's an external table.
                    return seg.raw
            else:
                assert seg.type == "select_statement"
                buff.append(SelectCrawler(seg, dialect))
        if not buff:
            # If we reach here, the SELECT may be querying from a value table
            # function, e.g. UNNEST(). For our purposes, this is basically the
            # same as an external table. Return the "table" part as a string.
            table_expr = segment.get_child("main_table_expression")
            if table_expr:
                return table_expr.raw
        return buff
示例#5
0
 def extract_ignore_mask(
     cls, tree: BaseSegment
 ) -> Tuple[List[NoQaDirective], List[SQLBaseError]]:
     """Look for inline ignore comments and return NoQaDirectives."""
     ignore_buff: List[NoQaDirective] = []
     violations: List[SQLBaseError] = []
     for comment in tree.recursive_crawl("comment"):
         if comment.name == "inline_comment":
             ignore_entry = cls.extract_ignore_from_comment(comment)
             if isinstance(ignore_entry, SQLParseError):
                 violations.append(ignore_entry)
             elif ignore_entry:
                 ignore_buff.append(ignore_entry)
     if ignore_buff:
         linter_logger.info("Parsed noqa directives from file: %r", ignore_buff)
     return ignore_buff, violations
示例#6
0
    def lint_fix_parsed(
        cls,
        tree: BaseSegment,
        config: FluffConfig,
        rule_set: List[BaseRule],
        fix: bool = False,
        fname: Optional[str] = None,
        templated_file: Optional[TemplatedFile] = None,
        formatter: Any = None,
    ) -> Tuple[BaseSegment, List[SQLBaseError], List[NoQaDirective]]:
        """Lint and optionally fix a tree object."""
        # Keep track of the linting errors on the very first linter pass. The
        # list of issues output by "lint" and "fix" only includes issues present
        # in the initial SQL code, EXCLUDING any issues that may be created by
        # the fixes themselves.
        initial_linting_errors = []
        # A placeholder for the fixes we had on the previous loop
        last_fixes = None
        # Keep a set of previous versions to catch infinite loops.
        previous_versions: Set[Tuple[str, Tuple[SourceFix,
                                                ...]]] = {(tree.raw, ())}

        # If we are fixing then we want to loop up to the runaway_limit, otherwise just
        # once for linting.
        loop_limit = config.get("runaway_limit") if fix else 1

        # Dispatch the output for the lint header
        if formatter:
            formatter.dispatch_lint_header(fname)

        # Look for comment segments which might indicate lines to ignore.
        if not config.get("disable_noqa"):
            rule_codes = [r.code for r in rule_set]
            ignore_buff, ivs = cls.extract_ignore_mask_tree(tree, rule_codes)
            initial_linting_errors += ivs
        else:
            ignore_buff = []

        save_tree = tree
        # There are two phases of rule running.
        # 1. The main loop is for most rules. These rules are assumed to
        # interact and cause a cascade of fixes requiring multiple passes.
        # These are run the `runaway_limit` number of times (default 10).
        # 2. The post loop is for post-processing rules, not expected to trigger
        # any downstream rules, e.g. capitalization fixes. They are run on the
        # first loop and then twice at the end (once to fix, and once again to
        # check result of fixes), but not in the intervening loops.
        phases = ["main"]
        if fix:
            phases.append("post")
        for phase in phases:
            if len(phases) > 1:
                rules_this_phase = [
                    rule for rule in rule_set if rule.lint_phase == phase
                ]
            else:
                rules_this_phase = rule_set
            for loop in range(loop_limit if phase == "main" else 2):

                def is_first_linter_pass():
                    return phase == phases[0] and loop == 0

                # Additional newlines are to assist in scanning linting loops
                # during debugging.
                linter_logger.info(
                    f"\n\nEntering linter phase {phase}, loop {loop+1}/{loop_limit}\n"
                )
                changed = False

                if is_first_linter_pass():
                    # In order to compute initial_linting_errors correctly, need
                    # to run all rules on the first loop of the main phase.
                    rules_this_phase = rule_set
                progress_bar_crawler = tqdm(
                    rules_this_phase,
                    desc="lint by rules",
                    leave=False,
                    disable=progress_bar_configuration.disable_progress_bar,
                )

                for crawler in progress_bar_crawler:
                    # Performance: After first loop pass, skip rules that don't
                    # do fixes. Any results returned won't be seen by the user
                    # anyway (linting errors ADDED by rules changing SQL, are
                    # not reported back to the user - only initial linting errors),
                    # so there's absolutely no reason to run them.
                    if (fix and not is_first_linter_pass()
                            and not is_fix_compatible(crawler)):
                        continue

                    progress_bar_crawler.set_description(
                        f"rule {crawler.code}")

                    # fixes should be a dict {} with keys edit, delete, create
                    # delete is just a list of segments to delete
                    # edit and create are list of tuples. The first element is
                    # the "anchor", the segment to look for either to edit or to
                    # insert BEFORE. The second is the element to insert or create.
                    linting_errors, _, fixes, _ = crawler.crawl(
                        tree,
                        dialect=config.get("dialect_obj"),
                        fix=fix,
                        templated_file=templated_file,
                        ignore_mask=ignore_buff,
                        fname=fname,
                    )
                    if is_first_linter_pass():
                        initial_linting_errors += linting_errors

                    if fix and fixes:
                        linter_logger.info(
                            f"Applying Fixes [{crawler.code}]: {fixes}")
                        # Do some sanity checks on the fixes before applying.
                        anchor_info = BaseSegment.compute_anchor_edit_info(
                            fixes)
                        if any(not info.is_valid for info in
                               anchor_info.values()):  # pragma: no cover
                            message = (
                                f"Rule {crawler.code} returned conflicting "
                                "fixes with the same anchor. This is only "
                                "supported for create_before+create_after, so "
                                "the fixes will not be applied. {fixes!r}")
                            cls._report_conflicting_fixes_same_anchor(message)
                            for lint_result in linting_errors:
                                lint_result.fixes = []
                        elif fixes == last_fixes:  # pragma: no cover
                            # If we generate the same fixes two times in a row,
                            # that means we're in a loop, and we want to stop.
                            # (Fixes should address issues, hence different
                            # and/or fewer fixes next time.)
                            cls._warn_unfixable(crawler.code)
                        else:
                            # This is the happy path. We have fixes, now we want to
                            # apply them.
                            last_fixes = fixes
                            new_tree, _, _ = tree.apply_fixes(
                                config.get("dialect_obj"), crawler.code,
                                anchor_info)
                            # Check for infinite loops. We use a combination of the
                            # fixed templated file and the list of source fixes to
                            # apply.
                            loop_check_tuple = (
                                new_tree.raw,
                                tuple(new_tree.source_fixes),
                            )
                            if loop_check_tuple not in previous_versions:
                                # We've not seen this version of the file so
                                # far. Continue.
                                tree = new_tree
                                previous_versions.add(loop_check_tuple)
                                changed = True
                                continue
                            else:
                                # Applying these fixes took us back to a state
                                # which we've seen before. We're in a loop, so
                                # we want to stop.
                                cls._warn_unfixable(crawler.code)

                if fix and not changed:
                    # We did not change the file. Either the file is clean (no
                    # fixes), or any fixes which are present will take us back
                    # to a previous state.
                    linter_logger.info(
                        f"Fix loop complete for {phase} phase. Stability "
                        f"achieved after {loop}/{loop_limit} loops.")
                    break
            else:
                if fix:
                    # The linter loop hit the limit before reaching a stable point
                    # (i.e. free of lint errors). If this happens, it's usually
                    # because one or more rules produced fixes which did not address
                    # the original issue **or** created new issues.
                    linter_logger.warning(
                        f"Loop limit on fixes reached [{loop_limit}].")

                    # Discard any fixes for the linting errors, since they caused a
                    # loop. IMPORTANT: By doing this, we are telling SQLFluff that
                    # these linting errors are "unfixable". This is important,
                    # because when "sqlfluff fix" encounters unfixable lint errors,
                    # it exits with a "failure" exit code, which is exactly what we
                    # want in this situation. (Reason: Although this is more of an
                    # internal SQLFluff issue, users deserve to know about it,
                    # because it means their file(s) weren't fixed.
                    for violation in initial_linting_errors:
                        if isinstance(violation, SQLLintError):
                            violation.fixes = []

                    # Return the original parse tree, before any fixes were applied.
                    # Reason: When the linter hits the loop limit, the file is often
                    # messy, e.g. some of the fixes were applied repeatedly, possibly
                    # other weird things. We don't want the user to see this junk!
                    return save_tree, initial_linting_errors, ignore_buff

        if config.get("ignore_templated_areas", default=True):
            initial_linting_errors = cls.remove_templated_errors(
                initial_linting_errors)

        return tree, initial_linting_errors, ignore_buff
示例#7
0
def get_select_statement_info(
        segment: BaseSegment,
        dialect: Optional[Dialect],
        early_exit: bool = True) -> Optional[SelectStatementColumnsAndTables]:
    """Analyze a select statement: targets, aliases, etc. Return info."""
    assert segment.is_type("select_statement")
    table_aliases, value_table_function_aliases = get_aliases_from_select(
        segment, dialect)
    if early_exit and not table_aliases and not value_table_function_aliases:
        return None

    # Iterate through all the references, both in the select clause, but also
    # potential others.
    sc = segment.get_child("select_clause")
    reference_buffer = list(sc.recursive_crawl("object_reference"))
    # Add any wildcard references
    reference_buffer += list(sc.recursive_crawl("wildcard_identifier"))
    for potential_clause in (
            "where_clause",
            "groupby_clause",
            "having_clause",
            "orderby_clause",
    ):
        clause = segment.get_child(potential_clause)
        if clause:
            reference_buffer += list(
                clause.recursive_crawl("object_reference"))
    # PURGE any references which are in nested select statements
    for ref in reference_buffer.copy():
        ref_path = segment.path_to(ref)
        # is it in a subselect? i.e. a select which isn't this one.
        if any(
                seg.is_type("select_statement") and seg is not segment
                for seg in ref_path):
            reference_buffer.remove(ref)

    # Get all select targets.
    select_targets = segment.get_child("select_clause").get_children(
        "select_clause_element")

    # Get all column aliases
    col_aliases = []
    for col_seg in list(sc.recursive_crawl("alias_expression")):
        for seg in col_seg.segments:
            if seg.is_type("identifier"):
                col_aliases.append(seg.raw)

    # Get any columns referred to in a using clause, and extract anything
    # from ON clauses.
    using_cols = []
    fc = segment.get_child("from_clause")
    if fc:
        for join_clause in fc.recursive_crawl("join_clause"):
            in_using_brackets = False
            seen_using = False
            for seg in join_clause.segments:
                if seg.is_type("keyword") and seg.name == "USING":
                    seen_using = True
                elif seg.is_type("join_on_condition"):
                    for on_seg in seg.segments:
                        if on_seg.is_type("expression"):
                            # Deal with expressions
                            reference_buffer += list(
                                seg.recursive_crawl("object_reference"))
                elif seen_using and seg.is_type("start_bracket"):
                    in_using_brackets = True
                elif seen_using and seg.is_type("end_bracket"):
                    in_using_brackets = False
                    seen_using = False
                elif in_using_brackets and seg.is_type("identifier"):
                    using_cols.append(seg.raw)

    return SelectStatementColumnsAndTables(
        select_statement=segment,
        table_aliases=table_aliases or [],
        value_table_function_aliases=value_table_function_aliases or [],
        reference_buffer=reference_buffer,
        select_targets=select_targets,
        col_aliases=col_aliases,
        using_cols=using_cols,
    )
示例#8
0
    def lint_fix(
        self,
        tree: BaseSegment,
        config: Optional[FluffConfig] = None,
        fix: bool = False,
        fname: Optional[str] = None,
        templated_file: Optional[TemplatedFile] = None,
    ) -> Tuple[BaseSegment, List[SQLLintError]]:
        """Lint and optionally fix a tree object."""
        config = config or self.config
        # Keep track of the linting errors
        all_linting_errors = []
        # A placeholder for the fixes we had on the previous loop
        last_fixes = None
        # Keep a set of previous versions to catch infinite loops.
        previous_versions = {tree.raw}

        # If we are fixing then we want to loop up to the runaway_limit, otherwise just once for linting.
        loop_limit = config.get("runaway_limit") if fix else 1

        # Dispatch the output for the lint header
        if self.formatter:
            self.formatter.dispatch_lint_header(fname)

        for loop in range(loop_limit):
            changed = False
            for crawler in self.get_ruleset(config=config):
                # fixes should be a dict {} with keys edit, delete, create
                # delete is just a list of segments to delete
                # edit and create are list of tuples. The first element is the
                # "anchor", the segment to look for either to edit or to insert BEFORE.
                # The second is the element to insert or create.
                linting_errors, _, fixes, _ = crawler.crawl(
                    tree,
                    dialect=config.get("dialect_obj"),
                    fname=fname,
                    templated_file=templated_file,
                )
                all_linting_errors += linting_errors

                if fix and fixes:
                    linter_logger.info(f"Applying Fixes: {fixes}")
                    # Do some sanity checks on the fixes before applying.
                    if fixes == last_fixes:
                        self._warn_unfixable(crawler.code)
                    else:
                        last_fixes = fixes
                        new_tree, _ = tree.apply_fixes(fixes)
                        # Check for infinite loops
                        if new_tree.raw not in previous_versions:
                            # We've not seen this version of the file so far. Continue.
                            tree = new_tree
                            previous_versions.add(tree.raw)
                            changed = True
                            continue
                        else:
                            # Applying these fixes took us back to a state which we've
                            # seen before. Abort.
                            self._warn_unfixable(crawler.code)

            if loop == 0:
                # Keep track of initial errors for reporting.
                initial_linting_errors = all_linting_errors.copy()

            if fix and not changed:
                # We did not change the file. Either the file is clean (no fixes), or
                # any fixes which are present will take us back to a previous state.
                linter_logger.info(
                    f"Fix loop complete. Stability achieved after {loop}/{loop_limit} loops."
                )
                break
        if fix and loop + 1 == loop_limit:
            linter_logger.warning(
                f"Loop limit on fixes reached [{loop_limit}].")

        if config.get("ignore_templated_areas", default=True):
            initial_linting_errors = self.remove_templated_errors(
                initial_linting_errors)

        return tree, initial_linting_errors
示例#9
0
文件: L028.py 项目: sqlfluff/sqlfluff
def _validate_one_reference(
    single_table_references: str,
    ref: BaseSegment,
    this_ref_type: str,
    standalone_aliases: List[str],
    table_ref_str: str,
    table_ref_str_source: Optional[BaseSegment],
    col_alias_names: List[str],
    seen_ref_types: Set[str],
    fixable: bool,
) -> Optional[LintResult]:
    # We skip any unqualified wildcard references (i.e. *). They shouldn't
    # count.
    if not ref.is_qualified() and ref.is_type(
            "wildcard_identifier"):  # type: ignore
        return None
    # Oddball case: Column aliases provided via function calls in by
    # FROM or JOIN. References to these don't need to be qualified.
    # Note there could be a table with a column by the same name as
    # this alias, so avoid bogus warnings by just skipping them
    # entirely rather than trying to enforce anything.
    if ref.raw in standalone_aliases:
        return None

    # Oddball case: tsql table variables can't be used to qualify references.
    # This appears here as an empty string for table_ref_str.
    if not table_ref_str:
        return None

    # Certain dialects allow use of SELECT alias in WHERE clauses
    if ref.raw in col_alias_names:
        return None

    if single_table_references == "consistent":
        if seen_ref_types and this_ref_type not in seen_ref_types:
            return LintResult(
                anchor=ref,
                description=f"{this_ref_type.capitalize()} reference "
                f"{ref.raw!r} found in single table select which is "
                "inconsistent with previous references.",
            )

        return None

    if single_table_references != this_ref_type:
        if single_table_references == "unqualified":
            # If this is qualified we must have a "table", "."" at least
            fixes = [LintFix.delete(el)
                     for el in ref.segments[:2]] if fixable else None
            return LintResult(
                anchor=ref,
                fixes=fixes,
                description="{} reference {!r} found in single table "
                "select.".format(this_ref_type.capitalize(), ref.raw),
            )

        fixes = None
        if fixable:
            fixes = [
                LintFix.create_before(
                    ref.segments[0] if len(ref.segments) else ref,
                    source=[table_ref_str_source]
                    if table_ref_str_source else None,
                    edit_segments=[
                        IdentifierSegment(
                            raw=table_ref_str,
                            type="naked_identifier",
                        ),
                        SymbolSegment(raw=".", type="symbol"),
                    ],
                )
            ]
        return LintResult(
            anchor=ref,
            fixes=fixes,
            description="{} reference {!r} found in single table "
            "select.".format(this_ref_type.capitalize(), ref.raw),
        )

    return None
示例#10
0
文件: linter.py 项目: sti0/sqlfluff
    def lint_fix_parsed(
        cls,
        tree: BaseSegment,
        config: FluffConfig,
        rule_set: List[BaseRule],
        fix: bool = False,
        fname: Optional[str] = None,
        templated_file: Optional[TemplatedFile] = None,
        formatter: Any = None,
    ) -> Tuple[BaseSegment, List[SQLBaseError], List[NoQaDirective]]:
        """Lint and optionally fix a tree object."""
        # Keep track of the linting errors
        all_linting_errors = []
        # A placeholder for the fixes we had on the previous loop
        last_fixes = None
        # Keep a set of previous versions to catch infinite loops.
        previous_versions = {tree.raw}

        # If we are fixing then we want to loop up to the runaway_limit, otherwise just
        # once for linting.
        loop_limit = config.get("runaway_limit") if fix else 1

        # Dispatch the output for the lint header
        if formatter:
            formatter.dispatch_lint_header(fname)

        # Look for comment segments which might indicate lines to ignore.
        if not config.get("disable_noqa"):
            rule_codes = [r.code for r in rule_set]
            ignore_buff, ivs = cls.extract_ignore_mask_tree(tree, rule_codes)
            all_linting_errors += ivs
        else:
            ignore_buff = []

        save_tree = tree
        for loop in range(loop_limit):
            changed = False

            progress_bar_crawler = tqdm(
                rule_set,
                desc="lint by rules",
                leave=False,
                disable=progress_bar_configuration.disable_progress_bar,
            )

            for crawler in progress_bar_crawler:
                progress_bar_crawler.set_description(f"rule {crawler.code}")

                # fixes should be a dict {} with keys edit, delete, create
                # delete is just a list of segments to delete
                # edit and create are list of tuples. The first element is the
                # "anchor", the segment to look for either to edit or to insert BEFORE.
                # The second is the element to insert or create.
                linting_errors, _, fixes, _ = crawler.crawl(
                    tree,
                    ignore_mask=ignore_buff,
                    dialect=config.get("dialect_obj"),
                    fname=fname,
                    templated_file=templated_file,
                )
                all_linting_errors += linting_errors

                if fix and fixes:
                    linter_logger.info(
                        f"Applying Fixes [{crawler.code}]: {fixes}")
                    # Do some sanity checks on the fixes before applying.
                    if fixes == last_fixes:  # pragma: no cover
                        cls._warn_unfixable(crawler.code)
                    else:
                        last_fixes = fixes
                        new_tree, _ = tree.apply_fixes(
                            config.get("dialect_obj"), fixes)
                        # Check for infinite loops
                        if new_tree.raw not in previous_versions:
                            # We've not seen this version of the file so far. Continue.
                            tree = new_tree
                            previous_versions.add(tree.raw)
                            changed = True
                            continue
                        else:
                            # Applying these fixes took us back to a state which we've
                            # seen before. Abort.
                            cls._warn_unfixable(crawler.code)

            if loop == 0:
                # Keep track of initial errors for reporting.
                initial_linting_errors = all_linting_errors.copy()

            if fix and not changed:
                # We did not change the file. Either the file is clean (no fixes), or
                # any fixes which are present will take us back to a previous state.
                linter_logger.info(
                    f"Fix loop complete. Stability achieved after {loop}/{loop_limit} "
                    "loops.")
                break
        else:
            if fix:
                # The linter loop hit the limit before reaching a stable point
                # (i.e. free of lint errors). If this happens, it's usually
                # because one or more rules produced fixes which did not address
                # the original issue **or** created new issues.
                linter_logger.warning(
                    f"Loop limit on fixes reached [{loop_limit}].")

                # Discard any fixes for the linting errors, since they caused a
                # loop. IMPORTANT: By doing this, we are telling SQLFluff that
                # these linting errors are "unfixable". This is important,
                # because when "sqlfluff fix" encounters unfixable lint errors,
                # it exits with a "failure" exit code, which is exactly what we
                # want in this situation. (Reason: Although this is more of an
                # internal SQLFluff issue, users deserve to know about it,
                # because it means their file(s) weren't fixed.
                for violation in initial_linting_errors:
                    if isinstance(violation, SQLLintError):
                        violation.fixes = []

                # Return the original parse tree, before any fixes were applied.
                # Reason: When the linter hits the loop limit, the file is often
                # messy, e.g. some of the fixes were applied repeatedly, possibly
                # other weird things. We don't want the user to see this junk!
                return save_tree, initial_linting_errors, ignore_buff

        if config.get("ignore_templated_areas", default=True):
            initial_linting_errors = cls.remove_templated_errors(
                initial_linting_errors)

        return tree, initial_linting_errors, ignore_buff
示例#11
0
 def is_self_match(self, segment: BaseSegment) -> bool:
     """Does this segment match the relevant criteria."""
     return segment.is_type(*self.types)
示例#12
0
 def passes_filter(self, segment: BaseSegment):
     """Returns true if this segment considered at all."""
     return self.works_on_unparsable or not segment.is_type("unparsable")