def matches_target_tuples( seg: BaseSegment, target_tuples: List[Tuple[str, str]], parent: Optional[BaseSegment] = None, ): """Does the given segment match any of the given type tuples?""" if seg.raw_upper in [ elem[1] for elem in target_tuples if elem[0] == "raw_upper" ]: return True elif seg.is_type(*[elem[1] for elem in target_tuples if elem[0] == "type"]): return True # For parent type checks, there's a higher risk of getting an incorrect # segment, so we add some additional guards. We also only check keywords # as for other types we can check directly rather than using parent elif ( not seg.is_meta and not seg.is_comment and not seg.is_templated and not seg.is_whitespace and isinstance(seg, RawSegment) and len(seg.raw) > 0 and seg.is_type("keyword") and parent and parent.is_type( *[elem[1] for elem in target_tuples if elem[0] == "parenttype"] ) ): # TODO: This clause is much less used post crawler migration. # Consider whether this should be removed once that migration # is complete. return True # pragma: no cover return False
def test__parser__base_segments_stubs(): """Test stub methods that have no implementation in base class.""" template = TemplatedFile.from_string("foobar") rs1 = RawSegment("foobar", PositionMarker(slice(0, 6), slice(0, 6), template)) base_segment = BaseSegment(segments=[rs1]) with pytest.raises(NotImplementedError): base_segment.edit("foo")
def test__parser__base_segments_type(): """Test the .is_type() method.""" assert BaseSegment.class_is_type("base") assert not BaseSegment.class_is_type("foo") assert not BaseSegment.class_is_type("foo", "bar") assert DummySegment.class_is_type("dummy") assert DummySegment.class_is_type("base") assert DummySegment.class_is_type("base", "foo", "bar")
def _missing_whitespace(seg: BaseSegment, before=True) -> bool: """Check whether we're missing whitespace given an adjoining segment. This avoids flagging for commas after quoted strings. https://github.com/sqlfluff/sqlfluff/issues/943 """ simple_res = Rule_L006._missing_whitespace(seg, before=before) if (not before and seg and (seg.is_type("comma", "statement_terminator") or (seg.is_type("cast_expression") and seg.get_child("casting_operator")))): return False return simple_res
def _get_orderby_info(segment: BaseSegment) -> List[OrderByColumnInfo]: assert segment.is_type("orderby_clause") result = [] found_column_reference = False ordering_reference = None for child_segment in segment.segments: if child_segment.is_type("column_reference"): found_column_reference = True elif child_segment.is_type("keyword") and child_segment.name in ( "asc", "desc", ): ordering_reference = child_segment.name elif found_column_reference and child_segment.type not in [ "keyword", "whitespace", "indent", "dedent", ]: result.append( OrderByColumnInfo(separator=child_segment, order=ordering_reference)) # Reset findings found_column_reference = False ordering_reference = None # Special handling for last column if found_column_reference: result.append( OrderByColumnInfo(separator=segment.segments[-1], order=ordering_reference)) return result
def _compute_segment_length(cls, segment: BaseSegment) -> int: if segment.is_type("newline"): # Generally, we won't see newlines, but if we do, simply ignore # them. Rationale: The intent of this rule is to enforce maximum # line length, and newlines don't make lines longer. return 0 assert segment.pos_marker if "\n" in segment.pos_marker.source_str(): # Similarly we shouldn't see newlines in source segments # However for templated loops it's often not possible to # accurately calculate the segments. These will be caught by # the first iteration of the loop (which is non-templated) # so doesn't suffer from the same bug, so we can ignore these return 0 # Compute the length of this segments in SOURCE space (before template # expansion). slice_length = (segment.pos_marker.source_slice.stop - segment.pos_marker.source_slice.start) if slice_length: return slice_length else: # If a segment did not originate from the original source, its slice # length slice length will be zero. This occurs, for example, when # other lint rules add indentation or other whitespace. In that # case, compute the length of its contents. return len(segment.raw)
def _implicit_column_references( cls, segment: BaseSegment) -> Iterator[BaseSegment]: """Yield any implicit ORDER BY or GROUP BY column references. This function was adapted from similar code in L054. """ _ignore_types: List[str] = [ "withingroup_clause", "window_specification" ] if not segment.is_type(*_ignore_types): # Ignore Windowing clauses if segment.is_type("groupby_clause", "orderby_clause"): for seg in segment.segments: if seg.is_type("numeric_literal"): yield segment else: for seg in segment.segments: yield from cls._implicit_column_references(seg)
def crawl_sources( self, segment: BaseSegment, recurse_into=True, pop=False ) -> Generator[Union[str, "Query"], None, None]: """Find SELECTs, table refs, or value table function calls in segment. For each SELECT, yield a list of Query objects. As we find table references or function call strings, yield those. """ found_nested_select = False for seg in segment.recursive_crawl( "table_reference", "set_expression", "select_statement", "values_clause", recurse_into=recurse_into, ): if seg is segment: # If the starting segment itself matches the list of types we're # searching for, recursive_crawl() will return it. Skip that. continue if seg.is_type("table_reference"): if not seg.is_qualified(): cte = self.lookup_cte(seg.raw, pop=pop) if cte: # It's a CTE. yield cte # It's an external table. yield seg.raw else: assert seg.is_type( "set_expression", "select_statement", "values_clause" ) found_nested_select = True crawler = SelectCrawler(seg, self.dialect, parent=self) # We know this will pass because we specified parent=self above. assert crawler.query_tree yield crawler.query_tree if not found_nested_select: # If we reach here, the SELECT may be querying from a value table # function, e.g. UNNEST(). For our purposes, this is basically the # same as an external table. Return the "table" part as a string. table_expr = segment.get_child("table_expression") if table_expr: yield table_expr.raw
def get_trailing_newlines(segment: BaseSegment) -> List[BaseSegment]: """Returns list of trailing newlines in the tree.""" result = [] for seg in segment.recursive_crawl_all(reverse=True): if seg.is_type("newline"): result.append(seg) if not seg.is_whitespace and not seg.is_type("dedent", "end_of_file"): break return result
def matches_target_tuples(seg: BaseSegment, target_tuples: List[Tuple[str, str]]): """Does the given segment match any of the given type tuples.""" if seg.name in [ elem[1] for elem in target_tuples if elem[0] == "name" ]: return True elif seg.is_type( *[elem[1] for elem in target_tuples if elem[0] == "type"]): return True return False
def _choose_anchor_segment( root_segment: BaseSegment, edit_type: str, segment: BaseSegment, filter_meta: bool = False, ): """Choose the anchor point for a lint fix, i.e. where to apply the fix. From a grammar perspective, segments near the leaf of the tree are generally less likely to allow general edits such as whitespace insertion. This function avoids such issues by taking a proposed anchor point (assumed to be near the leaf of the tree) and walking "up" the parse tree as long as the ancestor segments have the same start or end point (depending on the edit type) as "segment". This newly chosen anchor is more likely to be a valid anchor point for the fix. """ if edit_type not in ("create_before", "create_after"): return segment anchor: BaseSegment = segment child: BaseSegment = segment path: Optional[List[BaseSegment]] = ( root_segment.path_to(segment) if root_segment else None ) inner_path: Optional[List[BaseSegment]] = path[1:-1] if path else None if inner_path: for seg in inner_path[::-1]: # Which lists of children to check against. children_lists: List[List[BaseSegment]] = [] if filter_meta: # Optionally check against filtered (non-meta only) children. children_lists.append( [child for child in seg.segments if not child.is_meta] ) # Always check against the full set of children. children_lists.append(seg.segments) children: List[BaseSegment] for children in children_lists: if edit_type == "create_before" and children[0] is child: anchor = seg assert anchor.raw.startswith(segment.raw) child = seg break elif edit_type == "create_after" and children[-1] is child: anchor = seg assert anchor.raw.endswith(segment.raw) child = seg break return anchor
def _(segment: BaseSegment) -> bool: return segment.is_raw()
def _(segment: BaseSegment): return segment.is_name(*seg_name)
def _(segment: BaseSegment): return segment.is_type(*seg_type)
def _(segment: BaseSegment) -> str: return segment.get_name()