def test__parser__grammar_sequence(seg_list, caplog): """Test the Sequence grammar.""" bs = StringParser("bar", KeywordSegment) fs = StringParser("foo", KeywordSegment) g = Sequence(bs, fs) # If running in the test environment, assert that Sequence recognises this if getenv("SQLFLUFF_TESTENV", ""): assert g.test_env gc = Sequence(bs, fs, allow_gaps=False) with RootParseContext(dialect=None) as ctx: with caplog.at_level(logging.DEBUG, logger="sqlfluff.parser"): # Should be able to match the list using the normal matcher logging.info("#### TEST 1") m = g.match(seg_list, parse_context=ctx) assert m assert len(m) == 3 assert m.matched_segments == ( KeywordSegment("bar", seg_list[0].pos_marker), seg_list[1], # This will be the whitespace segment KeywordSegment("foo", seg_list[2].pos_marker), ) # Shouldn't with the allow_gaps matcher logging.info("#### TEST 2") assert not gc.match(seg_list, parse_context=ctx) # Shouldn't match even on the normal one if we don't start at the beginning logging.info("#### TEST 2") assert not g.match(seg_list[1:], parse_context=ctx)
def test__parser__grammar_sequence_indent_conditional(seg_list, caplog): """Test the Sequence grammar with indents.""" bs = StringParser("bar", KeywordSegment) fs = StringParser("foo", KeywordSegment) # We will assume the default config has indented_joins = False. # We're testing without explictly setting the `config_type` because # that's the assumed way of using the grammar in practice. g = Sequence( Conditional(Indent, indented_joins=False), bs, Conditional(Indent, indented_joins=True), fs, ) with RootParseContext(dialect=None) as ctx: with caplog.at_level(logging.DEBUG, logger="sqlfluff.parser"): m = g.match(seg_list, parse_context=ctx) assert m # Check we get an Indent. assert isinstance(m.matched_segments[0], Indent) assert isinstance(m.matched_segments[1], KeywordSegment) # check the whitespace is still there assert isinstance(m.matched_segments[2], WhitespaceSegment) # Check the second Indent does not appear assert not isinstance(m.matched_segments[3], Indent) assert isinstance(m.matched_segments[3], KeywordSegment)
def test__parser__grammar__base__bracket_sensitive_look_ahead_match( bracket_seg_list, fresh_ansi_dialect ): """Test the _bracket_sensitive_look_ahead_match method of the BaseGrammar.""" bs = StringParser("bar", KeywordSegment) fs = StringParser("foo", KeywordSegment) # We need a dialect here to do bracket matching with RootParseContext(dialect=fresh_ansi_dialect) as ctx: # Basic version, we should find bar first pre_section, match, matcher = BaseGrammar._bracket_sensitive_look_ahead_match( bracket_seg_list, [fs, bs], ctx ) assert pre_section == () assert matcher == bs # NB the middle element is a match object assert match.matched_segments == ( KeywordSegment("bar", bracket_seg_list[0].pos_marker), ) # Look ahead for foo, we should find the one AFTER the brackets, not the # on IN the brackets. pre_section, match, matcher = BaseGrammar._bracket_sensitive_look_ahead_match( bracket_seg_list, [fs], ctx ) # NB: The bracket segments will have been mutated, so we can't directly compare assert len(pre_section) == 8 assert matcher == fs # We shouldn't match the whitespace with the keyword assert match.matched_segments == ( KeywordSegment("foo", bracket_seg_list[8].pos_marker), )
def test__parser__grammar_oneof_exclude(seg_list): """Test the OneOf grammar exclude option.""" bs = StringParser("bar", KeywordSegment) fs = StringParser("foo", KeywordSegment) g = OneOf(bs, exclude=Sequence(bs, fs)) with RootParseContext(dialect=None) as ctx: # Just against the first alone assert g.match(seg_list[:1], parse_context=ctx) # Now with the bit to exclude included assert not g.match(seg_list, parse_context=ctx)
def test__parser__grammar_startswith_b( include_terminator, match_length, seg_list, fresh_ansi_dialect, caplog ): """Test the StartsWith grammar with a terminator (included & exluded).""" baar = StringParser("baar", KeywordSegment) bar = StringParser("bar", KeywordSegment) grammar = StartsWith(bar, terminator=baar, include_terminator=include_terminator) with RootParseContext(dialect=fresh_ansi_dialect) as ctx: with caplog.at_level(logging.DEBUG, logger="sqlfluff.parser"): m = grammar.match(seg_list, parse_context=ctx) assert len(m) == match_length
def test__parser__grammar_sequence_indent(seg_list, caplog): """Test the Sequence grammar with indents.""" bs = StringParser("bar", KeywordSegment) fs = StringParser("foo", KeywordSegment) g = Sequence(Indent, bs, fs) with RootParseContext(dialect=None) as ctx: with caplog.at_level(logging.DEBUG, logger="sqlfluff.parser"): m = g.match(seg_list, parse_context=ctx) assert m # check we get an indent. assert isinstance(m.matched_segments[0], Indent) assert isinstance(m.matched_segments[1], KeywordSegment)
def test__parser__grammar_oneof_templated(seg_list): """Test the OneOf grammar. NB: Should behave the same regardless of code_only. """ bs = StringParser("bar", KeywordSegment) fs = StringParser("foo", KeywordSegment) g = OneOf(fs, bs) with RootParseContext(dialect=None) as ctx: # This shouldn't match, but it *ALSO* shouldn't raise an exception. # https://github.com/sqlfluff/sqlfluff/issues/780 assert not g.match(seg_list[5:], parse_context=ctx)
def test__parser__core_keyword(raw_seg_list): """Test the Mystical KeywordSegment.""" # First make a keyword FooKeyword = StringParser("foo", KeywordSegment, type="bar") # Check it looks as expected assert FooKeyword.template.upper() == "FOO" with RootParseContext(dialect=None) as ctx: # Match it against a list and check it doesn't match assert not FooKeyword.match(raw_seg_list, parse_context=ctx) # Match it against a the first element and check it doesn't match assert not FooKeyword.match(raw_seg_list[0], parse_context=ctx) # Match it against a the first element as a list and check it doesn't match assert not FooKeyword.match([raw_seg_list[0]], parse_context=ctx) # Match it against the final element (returns tuple) m = FooKeyword.match(raw_seg_list[1], parse_context=ctx) assert m assert m.matched_segments[0].raw == "foo" assert isinstance(m.matched_segments[0], KeywordSegment) # Match it against the final element as a list assert FooKeyword.match([raw_seg_list[1]], parse_context=ctx) # Match it against a list slice and check it still works assert FooKeyword.match(raw_seg_list[1:], parse_context=ctx) # Check that the types work right. Importantly that the "bar" # type makes it in. assert m.matched_segments[0].class_types == {"base", "keyword", "raw", "bar"}
def test__parser__grammar_oneof(seg_list, allow_gaps): """Test the OneOf grammar. NB: Should behave the same regardless of code_only. """ bs = StringParser("bar", KeywordSegment) fs = StringParser("foo", KeywordSegment) g = OneOf(fs, bs, allow_gaps=allow_gaps) with RootParseContext(dialect=None) as ctx: # Check directly assert g.match(seg_list, parse_context=ctx).matched_segments == ( KeywordSegment("bar", seg_list[0].pos_marker), ) # Check with a bit of whitespace assert not g.match(seg_list[1:], parse_context=ctx)
def test__parser__grammar_greedyuntil_bracketed(bracket_seg_list, fresh_ansi_dialect): """Test the GreedyUntil grammar with brackets.""" fs = StringParser("foo", KeywordSegment) g = GreedyUntil(fs) with RootParseContext(dialect=fresh_ansi_dialect) as ctx: # Check that we can make it past the brackets assert len(g.match(bracket_seg_list, parse_context=ctx)) == 7
def test__parser__grammar__base__bracket_fail_with_open_paren_close_square_mismatch( generate_test_segments, fresh_ansi_dialect ): """Test _bracket_sensitive_look_ahead_match failure case. Should fail when the type of a close bracket doesn't match the type of the corresponding open bracket, but both are "definite" brackets. """ fs = StringParser("foo", KeywordSegment) # We need a dialect here to do bracket matching with RootParseContext(dialect=fresh_ansi_dialect) as ctx: # Basic version, we should find bar first with pytest.raises(SQLParseError) as sql_parse_error: BaseGrammar._bracket_sensitive_look_ahead_match( generate_test_segments( [ "select", " ", "*", " ", "from", "(", "foo", "]", # Bracket types don't match (parens vs square) ] ), [fs], ctx, ) assert sql_parse_error.match("Found unexpected end bracket")
def test__parser__grammar__base__bracket_fail_with_unexpected_end_bracket( generate_test_segments, fresh_ansi_dialect): """Test _bracket_sensitive_look_ahead_match edge case. Should fail gracefully and stop matching if we find a trailing unmatched. """ fs = StringParser("foo", KeywordSegment) # We need a dialect here to do bracket matching with RootParseContext(dialect=fresh_ansi_dialect) as ctx: _, match, _ = BaseGrammar._bracket_sensitive_look_ahead_match( generate_test_segments([ "bar", "(", # This bracket pair should be mutated ")", " ", ")", # This is the unmatched bracket " ", "foo", ]), [fs], ctx, ) # Check we don't match (even though there's a foo at the end) assert not match # Check the first bracket pair have been mutated. segs = match.unmatched_segments assert segs[1].is_type("bracketed") assert segs[1].raw == "()" assert len(segs[1].segments) == 2 # Check the trailing foo hasn't been mutated assert segs[5].raw == "foo" assert not isinstance(segs[5], KeywordSegment)
def test__parser__grammar_anysetof(generate_test_segments): """Test the AnySetOf grammar.""" token_list = ["bar", " \t ", "foo", " \t ", "bar"] seg_list = generate_test_segments(token_list) bs = StringParser("bar", KeywordSegment) fs = StringParser("foo", KeywordSegment) g = AnySetOf(fs, bs) with RootParseContext(dialect=None) as ctx: # Check directly assert g.match(seg_list, parse_context=ctx).matched_segments == ( KeywordSegment("bar", seg_list[0].pos_marker), WhitespaceSegment(" \t ", seg_list[1].pos_marker), KeywordSegment("foo", seg_list[2].pos_marker), ) # Check with a bit of whitespace assert not g.match(seg_list[1:], parse_context=ctx)
def test__parser__grammar__oneof__copy(): """Test grammar copying.""" bs = StringParser("bar", KeywordSegment) fs = StringParser("foo", KeywordSegment) g1 = OneOf(fs, bs) # Check copy g2 = g1.copy() assert g1 == g2 assert g1 is not g2 # Check copy insert (start) g3 = g1.copy(insert=[bs], at=0) assert g3 == OneOf(bs, fs, bs) # Check copy insert (mid) g4 = g1.copy(insert=[bs], at=1) assert g4 == OneOf(fs, bs, bs) # Check copy insert (end) g5 = g1.copy(insert=[bs], at=-1) assert g5 == OneOf(fs, bs, bs)
def test__parser__grammar_startswith_a(keyword, match_truthy, seg_list, fresh_ansi_dialect, caplog): """Test the StartsWith grammar simply.""" Keyword = StringParser(keyword, KeywordSegment) grammar = StartsWith(Keyword) with RootParseContext(dialect=fresh_ansi_dialect) as ctx: with caplog.at_level(logging.DEBUG, logger="sqlfluff.parser"): m = grammar.match(seg_list, parse_context=ctx) assert bool(m) is match_truthy
def test__parser__grammar_sequence_nested(seg_list, caplog): """Test the Sequence grammar when nested.""" bs = StringParser("bar", KeywordSegment) fs = StringParser("foo", KeywordSegment) bas = StringParser("baar", KeywordSegment) g = Sequence(Sequence(bs, fs), bas) with RootParseContext(dialect=None) as ctx: with caplog.at_level(logging.DEBUG, logger="sqlfluff.parser"): # Matching the start of the list shouldn't work logging.info("#### TEST 1") assert not g.match(seg_list[:2], parse_context=ctx) # Matching the whole list should, and the result should be flat logging.info("#### TEST 2") assert g.match(seg_list, parse_context=ctx).matched_segments == ( KeywordSegment("bar", seg_list[0].pos_marker), seg_list[1], # This will be the whitespace segment KeywordSegment("foo", seg_list[2].pos_marker), KeywordSegment("baar", seg_list[3].pos_marker) # NB: No whitespace at the end, this shouldn't be consumed. )
def test__parser__grammar_greedyuntil(keyword, seg_list, enforce_ws, slice_len, fresh_ansi_dialect): """Test the GreedyUntil grammar.""" grammar = GreedyUntil( StringParser(keyword, KeywordSegment), enforce_whitespace_preceding_terminator=enforce_ws, ) with RootParseContext(dialect=fresh_ansi_dialect) as ctx: assert (grammar.match( seg_list, parse_context=ctx).matched_segments == seg_list[:slice_len])
def test__parser__grammar__base__longest_trimmed_match__adv(seg_list, caplog): """Test the _longest_trimmed_match method of the BaseGrammar.""" bs = StringParser("bar", KeywordSegment) fs = StringParser("foo", KeywordSegment) matchers = [ bs, fs, Sequence(bs, fs), # This should be the winner. OneOf(bs, fs), Sequence(bs, fs), # Another to check we return the first ] with RootParseContext(dialect=None) as ctx: # Matching the first element of the list with caplog.at_level(logging.DEBUG, logger="sqluff.parser"): match, matcher = BaseGrammar._longest_trimmed_match(seg_list, matchers, ctx) # Check we got a match assert match # Check we got the right one. assert matcher is matchers[2] # And it matched the first three segments assert len(match) == 3
def test__parser__grammar_oneof_take_longest_match(seg_list): """Test that the OneOf grammar takes the longest match.""" fooRegex = RegexParser(r"fo{2}", KeywordSegment) baar = StringParser("baar", KeywordSegment) foo = StringParser("foo", KeywordSegment) fooBaar = Sequence( foo, baar, ) # Even if fooRegex comes first, fooBaar # is a longer match and should be taken g = OneOf(fooRegex, fooBaar) with RootParseContext(dialect=None) as ctx: assert fooRegex.match(seg_list[2:], parse_context=ctx).matched_segments == ( KeywordSegment("foo", seg_list[2].pos_marker), ) assert g.match(seg_list[2:], parse_context=ctx).matched_segments == ( KeywordSegment("foo", seg_list[2].pos_marker), KeywordSegment("baar", seg_list[3].pos_marker), )
def test__parser__grammar_greedyuntil_bracketed(bracket_seg_list, fresh_ansi_dialect): """Test the GreedyUntil grammar with brackets.""" fs = StringParser("foo", KeywordSegment) g = GreedyUntil(fs) with RootParseContext(dialect=fresh_ansi_dialect) as ctx: # Check that we can make it past the brackets match = g.match(bracket_seg_list, parse_context=ctx) assert len(match) == 4 # Check we successfully constructed a bracketed segment assert match.matched_segments[2].is_type("bracketed") assert match.matched_segments[2].raw == "(foo )" # Check that the unmatched segments is foo AND the whitespace assert len(match.unmatched_segments) == 2
def test__parser__grammar_delimited( min_delimiters, allow_gaps, allow_trailing, token_list, match_len, caplog, generate_test_segments, fresh_ansi_dialect, ): """Test the Delimited grammar when not code_only.""" seg_list = generate_test_segments(token_list) g = Delimited( StringParser("bar", KeywordSegment), delimiter=StringParser(".", SymbolSegment, name="dot"), allow_gaps=allow_gaps, allow_trailing=allow_trailing, min_delimiters=min_delimiters, ) with RootParseContext(dialect=fresh_ansi_dialect) as ctx: with caplog.at_level(logging.DEBUG, logger="sqlfluff.parser"): # Matching with whitespace shouldn't match if we need at least one delimiter m = g.match(seg_list, parse_context=ctx) assert len(m) == match_len
def test__parser__grammar_oneof_take_first(seg_list): """Test that the OneOf grammar takes first match in case they are of same length.""" fooRegex = RegexParser(r"fo{2}", KeywordSegment) foo = StringParser("foo", KeywordSegment) # Both segments would match "foo" # so we test that order matters g1 = OneOf(fooRegex, foo) g2 = OneOf(foo, fooRegex) with RootParseContext(dialect=None) as ctx: assert g1.match(seg_list[2:], parse_context=ctx).matched_segments == ( KeywordSegment("foo", seg_list[2].pos_marker), ) assert g2.match(seg_list[2:], parse_context=ctx).matched_segments == ( KeywordSegment("foo", seg_list[2].pos_marker), )
def test__parser__grammar__base__look_ahead_match( seg_list_slice, matcher_keywords, result_slice, winning_matcher, pre_match_slice, seg_list, ): """Test the _look_ahead_match method of the BaseGrammar.""" # Make the matcher keywords matchers = [ StringParser(keyword, KeywordSegment) for keyword in matcher_keywords ] # Fetch the matching keyword from above by index winning_matcher = matchers[matcher_keywords.index(winning_matcher)] with RootParseContext(dialect=None) as ctx: m = BaseGrammar._look_ahead_match( seg_list[seg_list_slice], matchers, ctx, ) # Check structure of the response. assert isinstance(m, tuple) assert len(m) == 3 # Unpack result_pre_match, result_match, result_matcher = m # Check the right matcher won assert result_matcher == winning_matcher # Make check tuple for the pre-match section if pre_match_slice: pre_match_slice = seg_list[pre_match_slice] else: pre_match_slice = () assert result_pre_match == pre_match_slice # Make the check tuple expected_result = make_result_tuple( result_slice=result_slice, matcher_keywords=matcher_keywords, seg_list=seg_list, ) assert result_match.matched_segments == expected_result
def test__parser__core_ephemeral_segment(raw_seg_list): """Test the Mystical KeywordSegment.""" # First make a keyword BarKeyword = StringParser("bar", KeywordSegment) ephemeral_segment = EphemeralSegment( segments=raw_seg_list[:1], pos_marker=None, parse_grammar=BarKeyword, name="foobar", ) with RootParseContext(dialect=None) as ctx: # Parse it and make sure we don't get an EphemeralSegment back res = ephemeral_segment.parse(ctx) assert isinstance(res, tuple) elem = res[0] assert not isinstance(elem, EphemeralSegment) assert isinstance(elem, KeywordSegment)
def expand(self) -> "Dialect": """Expand any callable references to concrete ones. This must be called before using the dialect. But allows more flexible definitions to happen at runtime. NOTE: This method returns a copy of the current dialect so that we don't pollute the original dialect and get dependency issues. Returns: :obj:`Dialect`: a copy of the given dialect but with expanded references. """ # Are we already expanded? if self.expanded: raise ValueError( "Attempted to re-expand an already expanded dialect.") expanded_copy = self.copy_as(name=self.name) # Expand any callable elements of the dialect. for key in expanded_copy._library: if isinstance(expanded_copy._library[key], SegmentGenerator): # If the element is callable, call it passing the current # dialect and store the result in its place. # Use the .replace() method for its error handling. expanded_copy.replace( **{key: expanded_copy._library[key].expand(expanded_copy)}) # Expand any keyword sets. for keyword_set in [ "unreserved_keywords", "reserved_keywords", ]: # e.g. reserved_keywords, (JOIN, ...) # Make sure the values are available as KeywordSegments for kw in expanded_copy.sets(keyword_set): n = kw.capitalize() + "KeywordSegment" if n not in expanded_copy._library: expanded_copy._library[n] = StringParser( kw.lower(), KeywordSegment) expanded_copy.expanded = True return expanded_copy
def test__parser__core_keyword(raw_seg_list): """Test the Mystical KeywordSegment.""" # First make a keyword FooKeyword = StringParser("foo", KeywordSegment) # Check it looks as expected assert FooKeyword.template == "FOO" with RootParseContext(dialect=None) as ctx: # Match it against a list and check it doesn't match assert not FooKeyword.match(raw_seg_list, parse_context=ctx) # Match it against a the first element and check it doesn't match assert not FooKeyword.match(raw_seg_list[0], parse_context=ctx) # Match it against a the first element as a list and check it doesn't match assert not FooKeyword.match([raw_seg_list[0]], parse_context=ctx) # Match it against the final element (returns tuple) m = FooKeyword.match(raw_seg_list[1], parse_context=ctx) assert m assert m.matched_segments[0].raw == "foo" assert isinstance(m.matched_segments[0], KeywordSegment) # Match it against the final element as a list assert FooKeyword.match([raw_seg_list[1]], parse_context=ctx) # Match it against a list slice and check it still works assert FooKeyword.match(raw_seg_list[1:], parse_context=ctx)
def test__parser__grammar__base__longest_trimmed_match__basic( seg_list, seg_list_slice, matcher_keywords, trim_noncode, result_slice ): """Test the _longest_trimmed_match method of the BaseGrammar.""" # Make the matcher keywords matchers = [StringParser(keyword, KeywordSegment) for keyword in matcher_keywords] with RootParseContext(dialect=None) as ctx: m, _ = BaseGrammar._longest_trimmed_match( seg_list[seg_list_slice], matchers, ctx, trim_noncode=trim_noncode ) # Make the check tuple expected_result = make_result_tuple( result_slice=result_slice, matcher_keywords=matcher_keywords, seg_list=seg_list, ) assert m.matched_segments == expected_result
def test__parser__core_ephemeral_segment(raw_seg_list): """Test the Mystical KeywordSegment.""" # First make a keyword BarKeyword = StringParser("bar", KeywordSegment) ephemeral_segment = EphemeralSegment.make(match_grammar=BarKeyword, parse_grammar=BarKeyword, name="foobar") with RootParseContext(dialect=None) as ctx: # Test on a slice containing only the first element m = ephemeral_segment.match(raw_seg_list[:1], parse_context=ctx) assert m # Make sure that it matches as an instance of EphemeralSegment elem = m.matched_segments[0] assert isinstance(elem, ephemeral_segment) # Parse it and make sure we don't get an EphemeralSegment back res = elem.parse(ctx) assert isinstance(res, tuple) elem = res[0] assert not isinstance(elem, ephemeral_segment) assert isinstance(elem, KeywordSegment)
FunctionScriptTerminatorSegment=NamedParser("function_script_terminator", CodeSegment, type="statement_terminator"), WalrusOperatorSegment=NamedParser("walrus_operator", SymbolSegment, type="assignment_operator"), VariableNameSegment=RegexParser( r"[A-Z][A-Z0-9_]*", CodeSegment, name="function_variable", type="variable", ), ) exasol_fs_dialect.replace(SemicolonSegment=StringParser(";", SymbolSegment, name="semicolon", type="semicolon"), ) @exasol_fs_dialect.segment(replace=True) class StatementSegment(BaseSegment): """A generic segment, to any of its child subsegments.""" type = "statement" match_grammar = GreedyUntil(Ref("FunctionScriptTerminatorSegment")) parse_grammar = OneOf( Ref("CreateFunctionStatementSegment"), Ref("CreateScriptingLuaScriptStatementSegment"), Ref("CreateUDFScriptStatementSegment"), Ref("CreateAdapterScriptStatementSegment"),
# https://cwiki.apache.org/confluence/display/hive/languagemanual+types#LanguageManualTypes-Intervals hive_dialect.sets("datetime_units").update([ "NANO", "NANOS", "SECONDS", "MINUTES", "HOURS", "DAYS", "WEEKS", "MONTHS", "YEARS", ]) hive_dialect.add( StartAngleBracketSegment=StringParser("<", SymbolSegment, name="start_angle_bracket", type="start_angle_bracket"), EndAngleBracketSegment=StringParser(">", SymbolSegment, name="end_angle_bracket", type="end_angle_bracket"), JsonfileKeywordSegment=StringParser("JSONFILE", KeywordSegment, name="json_file", type="file_format"), RcfileKeywordSegment=StringParser("RCFILE", KeywordSegment, name="rc_file", type="file_format"), SequencefileKeywordSegment=StringParser("SEQUENCEFILE", KeywordSegment,