def _get_segment_move_context( self, target_segment: RawSegment, parent_segment: BaseSegment) -> SegmentMoveContext: # Locate the segment to be moved (i.e. context.segment) and search back # over the raw stack to find the end of the preceding statement. reversed_raw_stack = Segments(*parent_segment.raw_segments).reversed() before_code = reversed_raw_stack.select(loop_while=sp.not_( sp.is_code()), start_seg=target_segment) before_segment = before_code.select(sp.not_(sp.is_meta())) # We're selecting from the raw stack, so we know that before_code is # made of RawSegment elements. anchor_segment = (cast(RawSegment, before_code[-1]) if before_code else target_segment) first_code = reversed_raw_stack.select( sp.is_code(), start_seg=target_segment).first() self.logger.debug("Semicolon: first_code: %s", first_code) is_one_line = (self._is_one_line_statement( parent_segment, first_code[0]) if first_code else False) # We can tidy up any whitespace between the segment # and the preceding code/comment segment. # Don't mess with comment spacing/placement. whitespace_deletions = before_segment.select( loop_while=sp.is_whitespace()) return SegmentMoveContext(anchor_segment, is_one_line, before_segment, whitespace_deletions)
def _eval(self, context: RuleContext) -> Optional[LintResult]: """Find rule violations and provide fixes. 0. Look for a case expression 1. Look for "ELSE" 2. Mark "ELSE" for deletion (populate "fixes") 3. Backtrack and mark all newlines/whitespaces for deletion 4. Look for a raw "NULL" segment 5.a. The raw "NULL" segment is found, we mark it for deletion and return 5.b. We reach the end of case when without matching "NULL": the rule passes """ assert context.segment.is_type("case_expression") children = FunctionalContext(context).segment.children() else_clause = children.first(sp.is_type("else_clause")) # Does the "ELSE" have a "NULL"? NOTE: Here, it's safe to look for # "NULL", as an expression would *contain* NULL but not be == NULL. if else_clause and else_clause.children( lambda child: child.raw_upper == "NULL"): # Found ELSE with NULL. Delete the whole else clause as well as # indents/whitespaces/meta preceding the ELSE. :TRICKY: Note # the use of reversed() to make select() effectively search in # reverse. before_else = children.reversed().select( start_seg=else_clause[0], loop_while=sp.or_(sp.is_name("whitespace", "newline"), sp.is_meta()), ) return LintResult( anchor=context.segment, fixes=[LintFix.delete(else_clause[0])] + [LintFix.delete(seg) for seg in before_else], ) return None
def _get_subsequent_whitespace( self, context, ) -> Tuple[Optional[BaseSegment], Optional[BaseSegment]]: """Search forwards through the raw segments for subsequent whitespace. Return a tuple of both the trailing whitespace segment and the first non-whitespace segment discovered. """ # Get all raw segments. "raw_segments" is appropriate as the # only segments we can care about are comma, whitespace, # newline, and comment, which are all raw. Using the # raw_segments allows us to account for possible unexpected # parse tree structures resulting from other rule fixes. raw_segments = FunctionalContext(context).raw_segments # Start after the current comma within the list. Get all the # following whitespace. following_segments = raw_segments.select( loop_while=sp.or_(sp.is_meta(), sp.is_type("whitespace")), start_seg=context.segment, ) subsequent_whitespace = following_segments.last( sp.is_type("whitespace")) try: return ( subsequent_whitespace[0] if subsequent_whitespace else None, raw_segments[raw_segments.index(context.segment) + len(following_segments) + 1], ) except IndexError: # If we find ourselves here it's all whitespace (or nothing) to the # end of the file. This can only happen in bigquery (see # test_pass_bigquery_trailing_comma). return subsequent_whitespace, None
def _get_indexes(context: RuleContext): children = FunctionalContext(context).segment.children() select_targets = children.select(sp.is_type("select_clause_element")) first_select_target_idx = children.find(select_targets.get()) selects = children.select(sp.is_keyword("select")) select_idx = children.find(selects.get()) if selects else -1 newlines = children.select(sp.is_type("newline")) first_new_line_idx = children.find(newlines.get()) if newlines else -1 comment_after_select_idx = -1 if newlines: comment_after_select = children.select( sp.is_type("comment"), start_seg=selects.get(), stop_seg=newlines.get(), loop_while=sp.or_( sp.is_type("comment"), sp.is_type("whitespace"), sp.is_meta() ), ) if comment_after_select: comment_after_select_idx = ( children.find(comment_after_select.get()) if comment_after_select else -1 ) first_whitespace_idx = -1 if first_new_line_idx != -1: # TRICKY: Ignore whitespace prior to the first newline, e.g. if # the line with "SELECT" (before any select targets) has trailing # whitespace. segments_after_first_line = children.select( sp.is_type("whitespace"), start_seg=children[first_new_line_idx] ) first_whitespace_idx = children.find(segments_after_first_line.get()) siblings_post = FunctionalContext(context).siblings_post from_segment = siblings_post.first(sp.is_type("from_clause")).first().get() pre_from_whitespace = siblings_post.select( sp.is_type("whitespace"), stop_seg=from_segment ) return SelectTargetsInfo( select_idx, first_new_line_idx, first_select_target_idx, first_whitespace_idx, comment_after_select_idx, select_targets, from_segment, list(pre_from_whitespace), )
def _report_unused_alias(cls, alias: AliasInfo) -> LintResult: fixes = [LintFix.delete(alias.alias_expression)] # type: ignore # Walk back to remove indents/whitespaces to_delete = ( Segments( *alias.from_expression_element.segments).reversed().select( start_seg=alias.alias_expression, # Stop once we reach an other, "regular" segment. loop_while=sp.or_(sp.is_whitespace(), sp.is_meta()), )) fixes += [LintFix.delete(seg) for seg in to_delete] return LintResult( anchor=alias.segment, description="Alias {!r} is never used in SELECT statement.".format( alias.ref_str), fixes=fixes, )
def _eval(self, context: RuleContext) -> Optional[LintResult]: """Files must not begin with newlines or whitespace.""" # Only check raw segments. This ensures we don't try and delete the same # whitespace multiple times (i.e. for non-raw segments higher in the # tree). raw_segments = [] whitespace_types = {"newline", "whitespace", "indent", "dedent"} for seg in context.segment.recursive_crawl_all(): if not seg.is_raw(): continue if seg.is_type(*whitespace_types): raw_segments.append(seg) continue segment = Segments(seg) raw_stack = Segments(*raw_segments, templated_file=context.templated_file) # Non-whitespace segment. if (not raw_stack.all(sp.is_meta()) # Found leaf of parse tree. and not segment.all(sp.is_expandable()) # It is possible that a template segment (e.g. # {{ config(materialized='view') }}) renders to an empty string # and as such is omitted from the parsed tree. We therefore # should flag if a templated raw slice intersects with the # source slices in the raw stack and skip this rule to avoid # risking collisions with template objects. and not raw_stack.raw_slices.any( rsp.is_slice_type("templated"))): return LintResult( anchor=context.segment, fixes=[LintFix.delete(d) for d in raw_stack], ) else: break return None
def _eval(self, context: RuleContext) -> Optional[LintResult]: """Find rule violations and provide fixes.""" # Config type hints self.prefer_count_0: bool self.prefer_count_1: bool if ( # We already know we're in a function because of the crawl_behaviour context.segment.get_child("function_name").raw_upper == "COUNT" ): # Get bracketed content f_content = (FunctionalContext(context).segment.children( sp.is_type("bracketed")).children( sp.and_( sp.not_(sp.is_meta()), sp.not_( sp.is_type("start_bracket", "end_bracket", "whitespace", "newline")), ))) if len(f_content) != 1: # pragma: no cover return None preferred = "*" if self.prefer_count_1: preferred = "1" elif self.prefer_count_0: preferred = "0" if f_content[0].is_type("star") and (self.prefer_count_1 or self.prefer_count_0): return LintResult( anchor=context.segment, fixes=[ LintFix.replace( f_content[0], [ f_content[0].edit(f_content[0].raw.replace( "*", preferred)) ], ), ], ) if f_content[0].is_type("expression"): expression_content = [ seg for seg in f_content[0].segments if not seg.is_meta ] if (len(expression_content) == 1 and expression_content[0].is_type("literal") and expression_content[0].raw in ["0", "1"] and expression_content[0].raw != preferred): return LintResult( anchor=context.segment, fixes=[ LintFix.replace( expression_content[0], [ expression_content[0].edit( expression_content[0].raw.replace( expression_content[0].raw, preferred)), ], ), ], ) return None
def _eval(self, context: RuleContext) -> Optional[LintResult]: """Select clause modifiers must appear on same line as SELECT.""" # We only care about select_clause. assert context.segment.is_type("select_clause") # Get children of select_clause and the corresponding select keyword. child_segments = FunctionalContext(context).segment.children() select_keyword = child_segments[0] # See if we have a select_clause_modifier. select_clause_modifier_seg = child_segments.first( sp.is_type("select_clause_modifier")) # Rule doesn't apply if there's no select clause modifier. if not select_clause_modifier_seg: return None select_clause_modifier = select_clause_modifier_seg[0] # Are there any newlines between the select keyword # and the select clause modifier. leading_newline_segments = child_segments.select( select_if=sp.is_type("newline"), loop_while=sp.or_(sp.is_whitespace(), sp.is_meta()), start_seg=select_keyword, ) # Rule doesn't apply if select clause modifier # is already on the same line as the select keyword. if not leading_newline_segments: return None # We should check if there is whitespace before the select clause modifier # and remove this during the lint fix. leading_whitespace_segments = child_segments.select( select_if=sp.is_type("whitespace"), loop_while=sp.or_(sp.is_whitespace(), sp.is_meta()), start_seg=select_keyword, ) # We should also check if the following select clause element # is on the same line as the select clause modifier. trailing_newline_segments = child_segments.select( select_if=sp.is_type("newline"), loop_while=sp.or_(sp.is_whitespace(), sp.is_meta()), start_seg=select_clause_modifier, ) # We will insert these segments directly after the select keyword. edit_segments = [ WhitespaceSegment(), select_clause_modifier, ] if not trailing_newline_segments: # if the first select clause element is on the same line # as the select clause modifier then also insert a newline. edit_segments.append(NewlineSegment()) fixes = [] # Move select clause modifier after select keyword. fixes.append( LintFix.create_after( anchor_segment=select_keyword, edit_segments=edit_segments, )) # Delete original newlines and whitespace between select keyword # and select clause modifier. # If there is not a newline after the select clause modifier then delete # newlines between the select keyword and select clause modifier. if not trailing_newline_segments: fixes.extend(LintFix.delete(s) for s in leading_newline_segments) # If there is a newline after the select clause modifier then delete both the # newlines and whitespace between the select keyword and select clause modifier. else: fixes.extend( LintFix.delete(s) for s in leading_newline_segments + leading_whitespace_segments) # Delete the original select clause modifier. fixes.append(LintFix.delete(select_clause_modifier)) # If there is whitespace (on the same line) after the select clause modifier # then also delete this. trailing_whitespace_segments = child_segments.select( select_if=sp.is_whitespace(), loop_while=sp.or_(sp.is_type("whitespace"), sp.is_meta()), start_seg=select_clause_modifier, ) if trailing_whitespace_segments: fixes.extend( (LintFix.delete(s) for s in trailing_whitespace_segments)) return LintResult( anchor=context.segment, fixes=fixes, )