def test__parser__grammar_oneof_take_longest_match(seg_list): """Test that the OneOf grammar takes the longest match.""" fooRegex = ReSegment.make(r"fo{2}") baar = KeywordSegment.make( "baar", ) foo = KeywordSegment.make( "foo", ) fooBaar = Sequence( foo, baar, ) # Even if fooRegex comes first, fooBaar # is a longer match and should be taken g = OneOf(fooRegex, fooBaar) with RootParseContext(dialect=None) as ctx: assert fooRegex.match(seg_list[2:], parse_context=ctx).matched_segments == ( fooRegex("foo", seg_list[2].pos_marker), ) assert g.match(seg_list[2:], parse_context=ctx).matched_segments == ( foo("foo", seg_list[2].pos_marker), baar("baar", seg_list[3].pos_marker), )
def test__parser__grammar_oneof_take_first(seg_list): """Test that the OneOf grammar takes first match in case they are of same length.""" fooRegex = ReSegment.make(r"fo{2}") foo = KeywordSegment.make("foo", ) # Both segments would match "foo" # so we test that order matters g1 = OneOf(fooRegex, foo) g2 = OneOf(foo, fooRegex) with RootParseContext(dialect=None) as ctx: assert g1.match(seg_list[2:], parse_context=ctx).matched_segments == (fooRegex( "foo", seg_list[2].pos_marker), ) assert g2.match(seg_list[2:], parse_context=ctx).matched_segments == (foo( "foo", seg_list[2].pos_marker), )
), ), ] + exasol_fs_dialect.get_lexer_struct() ) exasol_fs_dialect.add( FunctionScriptTerminatorSegment=NamedSegment.make( "function_script_terminator", type="statement_terminator" ), WalrusOperatorSegment=NamedSegment.make( "walrus_operator", type="assignment_operator" ), VariableNameSegment=ReSegment.make( r"[A-Z][A-Z0-9_]*", name="function_variable", type="variable", ), ) exasol_fs_dialect.replace( SemicolonSegment=SymbolSegment.make(";", name="semicolon", type="semicolon"), ) @exasol_fs_dialect.segment(replace=True) class StatementSegment(BaseSegment): """A generic segment, to any of its child subsegments.""" type = "statement"
"PIVOT", "SAMPLE", "TABLESAMPLE", "UNPIVOT", ]) snowflake_dialect.add( # In snowflake, these are case sensitive even though they're not quoted # so they need a different `name` and `type` so they're not picked up # by other rules. ParameterAssignerSegment=SymbolSegment.make("=>", name="parameter_assigner", type="parameter_assigner"), NakedSemiStructuredElementSegment=ReSegment.make( r"[A-Z0-9_]*", name="naked_semi_structured_element", type="semi_structured_element", ), QuotedSemiStructuredElementSegment=NamedSegment.make( "double_quote", name="quoted_semi_structured_element", type="semi_structured_element", ), ColumnIndexIdentifierSegment=ReSegment.make( r"\$[0-9]+", name="column_index_identifier_segment", type="identifier"), ) snowflake_dialect.replace( Accessor_Grammar=AnyNumberOf( Ref("ArrayAccessorSegment"),
Sequence("FOR", "SYSTEM_TIME", "AS", "OF", Ref("ExpressionSegment"), optional=True), Sequence("WITH", "OFFSET", "AS", Ref("SingleIdentifierGrammar"), optional=True), ), FunctionNameSegment=ReSegment.make( # In BigQuery struct() has a special syntax, so we don't treat it as a function r"[A-Z][A-Z0-9_]*", name="function_name", type="function_name", _anti_template=r"STRUCT", ), SelectTargetElementSegment=SelectTargetElementSegment, SelectClauseSegment=SelectClauseSegment, ) @bigquery_dialect.segment(replace=True) class FunctionDefinitionGrammar(BaseSegment): """This is the body of a `CREATE FUNCTION AS` statement.""" match_grammar = Sequence( AnyNumberOf( Sequence(