def __init__(self): # build grammar according to Glue.Client.get_partitions(Expression) lpar, rpar = map(Suppress, "()") # NOTE these are AWS Athena column name best practices ident = Forward().set_name("ident") ident <<= Word(alphanums + "._").set_parse_action(_Ident) | lpar + ident + rpar number = Forward().set_name("number") number <<= pyparsing_common.number | lpar + number + rpar string = Forward().set_name("string") string <<= QuotedString(quote_char="'", esc_quote="''") | lpar + string + rpar literal = (number | string).set_name("literal") literal_list = delimited_list(literal, min=1).set_name("list") bin_op = one_of("<> >= <= > < =").set_name("binary op") and_ = Forward() and_ <<= CaselessKeyword("and") | lpar + and_ + rpar or_ = Forward() or_ <<= CaselessKeyword("or") | lpar + or_ + rpar in_, between, like, not_, is_, null = map( CaselessKeyword, "in between like not is null".split() ) not_ = Suppress(not_) # only needed for matching cond = ( (ident + is_ + null).set_parse_action(_IsNull) | (ident + is_ + not_ + null).set_parse_action(_IsNotNull) | (ident + bin_op + literal).set_parse_action(_BinOp) | (ident + like + string).set_parse_action(_Like) | (ident + not_ + like + string).set_parse_action(_NotLike) | (ident + in_ + lpar + literal_list + rpar).set_parse_action(_In) | (ident + not_ + in_ + lpar + literal_list + rpar).set_parse_action(_NotIn) | (ident + between + literal + and_ + literal).set_parse_action(_Between) | (ident + not_ + between + literal + and_ + literal).set_parse_action( _NotBetween ) ).set_name("cond") # conditions can be joined using 2-ary AND and/or OR expr = infix_notation( cond, [ (and_, 2, OpAssoc.LEFT, _BoolAnd), (or_, 2, OpAssoc.LEFT, _BoolOr), ], ) self._expr = expr.set_name("expr") self._cache: Dict[str, _Expr] = {}
# Single term, either field specific or not TERM = (FIELD + OPERATOR + (RANGE | STRING)) | STRING # Multi term with or without operator QUERY = Optional( infix_notation( TERM, [ ( NOT, 1, OpAssoc.RIGHT, ), ( AND, 2, OpAssoc.LEFT, ), ( OR, 2, OpAssoc.LEFT, ), ], )) # Helper parsing objects class RegexExpr: def __init__(self, tokens):
def: 𒁴𒈫 """ # uncomment to show parse-time debugging # pp.enable_diag(pp.Diagnostics.enable_debug_on_named_expressions) # define a MINIMAL Python parser LPAR, RPAR, COLON, EQ = map(pp.Suppress, "():=") def_ = pp.Keyword("𒁴𒈫", ident_chars=Cuneiform.identbodychars).set_name("def") any_keyword = def_ ident = (~any_keyword) + pp.Word( Cuneiform.identchars, Cuneiform.identbodychars, asKeyword=True) str_expr = pp.infix_notation( pp.QuotedString('"') | pp.common.integer, [ ("*", 2, pp.OpAssoc.LEFT), ("+", 2, pp.OpAssoc.LEFT), ], ) rvalue = pp.Forward() fn_call = (ident + pp.Group(LPAR + pp.Optional(rvalue) + RPAR)).set_name("fn_call") rvalue <<= fn_call | ident | str_expr | pp.common.number assignment_stmt = ident + EQ + rvalue stmt = pp.Group(fn_call | assignment_stmt).set_name("stmt") fn_def = pp.Group(def_ + ident + pp.Group(LPAR + pp.Optional(rvalue) + RPAR) + COLON).set_name("fn_def")
street_address = pp.Group( integer("house_number") + word[1, ...]("street_name")).setName("street_address") time = pp.Regex(r"\d\d:\d\d") grammar = ( pp.Group(integer[1, ...]) + (ppc.ipv4_address & word("header_word") & pp.Optional(time)).setName("header with various elements")("header") + street_address("address") + pp.Group(pp.counted_array(word)) + pp.Group(integer * 8)("data") + pp.Group(pp.Word("abc") + pp.Word("def") * 3) + pp.infix_notation(integer, [ (plus_minus().setName("pos_neg"), 1, pp.opAssoc.RIGHT), (mult_div, 2, pp.opAssoc.LEFT), (plus_minus, 2, pp.opAssoc.LEFT), ]).setName("simple_arithmetic") + ... + pp.Group(ppc.ipv4_address)("ip_address")).setName("grammar") grammar.create_diagram("railroad_diagram_demo.html", vertical=6, show_results_names=True) test = """\ 1 2 3 ABC 1.2.3.4 12:45 123 Main St 4 abc def ghi jkl 5 5 5 5 5 5 5 5