def main(s, out_fn): graphviz_setup() project_root = os.path.normpath(os.path.join(os.path.dirname(__file__), "../../")) fld = os.path.normpath(project_root + "./mappyfile") gf = os.path.join(fld, "mapfile.lalr.g") grammar_text = open(gf).read() g = Lark(grammar_text, parser="lalr", lexer="contextual") t = g.parse(s) print(t) pydot__tree_to_png(t, os.path.join(project_root, "docs/images", out_fn)) print(t.pretty())
class QueryParser(object): def __init__(self, flatten_tfidf=False, use_kwargs=False): self.parser = Lark(grammar, parser='lalr') self.use_kwargs = use_kwargs def parse(self, input): """Receives query, will parse it into an AST and extract final score together with the algebraic expression that summarizes the whole scoring chain """ # XXX: in true OOM fashion we should save tree with the instance # however I am using this class only as a wrapper for useful # methods; maybe will change that later... tree = self._get_tree(input) # this can assemble output from the tree (visiting each node) #visitor = TreeVisitor() #visitor.visit(tree) return tree def _get_tree(self, input): return self.parser.parse(self._cleanup(input)) def get_tree(self, input, destination=None): """Generates readable representation of the input optionally can save graph into a PNG file""" tree = self._get_tree(input) out = tree.pretty(indent_str=' ') if destination: pydot__tree_to_png(tree, destination) return out def _cleanup(self, text): t = re.sub(r'\n\s*\)', ")", text, flags=re.MULTILINE) return t
def parse_text(text): file_dir = os.path.dirname(os.path.abspath(__file__)) with open(os.path.join(file_dir, '..', 'known_words.txt')) as file: lines = file.read().split('\n') words = [l for l in lines if ',' not in l] word_map = {l.split(', ')[0]:l.split(', ')[-1] for l in lines if ',' in l} text = text.lower() original_terms = [word_map[token] if token in word_map else token for token in tokenize(text)] bracketed_terms = find_brackets(original_terms) # collapse unknown terms collapsed_terms = [] joint_term = [] for t in bracketed_terms: if t in words: if joint_term: collapsed_terms.append(' '.join(joint_term)) joint_term = [] collapsed_terms.append(t) else: joint_term.append(t) if joint_term: collapsed_terms.append(' '.join(joint_term)) term_map = {f'ENTITY{i}':w for i, w in enumerate(collapsed_terms) if w not in words} substituted_terms = [w if w in words else f'ENTITY{i}' for i, w in enumerate(collapsed_terms)] text = ' '.join(substituted_terms) with open(os.path.join(file_dir, '..', 'grammar.txt')) as file: grammar = file.read() parser = Lark(grammar, start='sentence', ambiguity='explicit') # Explicit ambiguity in parse tree! tree = parser.parse(text) graph = parse_tree(tree) graph = post_proc_graph(graph, term_map) return graph
def compile(): class TreeToDict(Transformer): start = dict @v_args(inline=True) def section(self, header, *pairs): return (header[1:-1], dict(pairs)) @v_args(inline=True) def stripped(self, s=''): return str.strip(s) value = key = stripped header = v_args(inline=True)(str) keyval = tuple bare_key = v_args(inline=True)(lambda self, key: (key, None)) parser = Lark( r""" start: _NL* section* section: header _NL+ (keyval _NL+)* keyval: key "=" value | key -> bare_key header: /\[[^\n\r\]]+\]/ key: /[^\n\r=]+/ value: /[^\n\r]+/ | COMMENT: ";" /[^\n]/* _NL %import common.NEWLINE -> _NL %import common.WS_INLINE %ignore WS_INLINE %ignore COMMENT """, parser="lalr", transformer=TreeToDict(), ) return lambda s: parser.parse(s + '\n')
def test_aliases(self): visited_ambiguous = [False] visited_full = [False] class CustomTransformer(TreeForestTransformer): @handles_ambiguity def start(self, data): for tree in data: assert tree.data == 'ambiguous' or tree.data == 'full' def ambiguous(self, data): visited_ambiguous[0] = True assert len(data) == 3 assert data[0].data == 'ab' assert data[1].data == 'bc' assert data[2].data == 'cd' return self.tree_class('ambiguous', data) def full(self, data): visited_full[0] = True assert len(data) == 1 assert data[0].data == 'abcd' return self.tree_class('full', data) grammar = """ start: ab bc cd -> ambiguous | abcd -> full !ab: "A" "B"? !bc: "B"? "C"? !cd: "C"? "D" !abcd: "ABCD" """ l = Lark(grammar, parser='earley', ambiguity='forest') forest = l.parse('ABCD') tree = CustomTransformer(resolve_ambiguity=False).transform(forest) self.assertTrue(visited_ambiguous[0]) self.assertTrue(visited_full[0])
def parse(name_file): calc_parser = Lark(calc_grammar, parser='lalr', debug=True, transformer=trans()) with open(name_file, 'r') as myfile: file_content = myfile.read() opt = re.search('optimize:\((.*)\)', re.sub('#.*', '', file_content), re.IGNORECASE) if not opt: exit("nothing to optimize") config.optimize = opt.group(1).split(';') config.opt_len = len( config.optimize) - 1 if 'time' in config.optimize else len( config.optimize) file_content = re.sub('optimize:(.*)', '', file_content) try: tree = calc_parser.parse(file_content) except UnexpectedInput as e: print(e) return (config.optimize)
def parse(text: str) -> StateGraph: """Parse given source code text into a state graph. """ grammer_path = Path(__file__).parent / 'grammar.lark' parser = Lark(grammer_path.read_text()) transformer = GrammarTransformer() definitions = [] for comment in extract_comments_from_str(text, mime='text/x-c'): comment_lines = [ line.strip(' *') for line in comment.text().split('\n') ] for lineno, line in enumerate(comment_lines): lineno += comment.line_number() if not line.startswith('@'): continue try: ast = parser.parse(line) element = transformer.transform(ast) definitions.append(element) except (UnexpectedCharacters, UnexpectedToken) as ex: if ex.column > 1: try: message = ex.args[0].split('at')[0] except IndexError: message = 'Unexpected input' raise ParseError(message=message, line=lineno) except UnexpectedEOF: raise ParseError(message='Unexpected end', line=lineno) Resolvable.resolve_all(definitions) graph = StateGraph.of(definitions) return graph
def partb(txt): """ https://www.reddit.com/r/adventofcode/comments/3xflz8/day_19_solutions/cy4p1td?utm_source=share&utm_medium=web2x&context=3 "this is actually the production rules for an unambiguous grammar" """ replacements, medicine = txt.split("\n\n") replacements = parse_replacements(replacements) replacements = { k: [split_molecule_into_atoms(v) for v in vals] for k, vals in replacements.items() } all_atoms = set() for from_atom, to_molecules in replacements.items(): all_atoms.add(from_atom) for m in to_molecules: all_atoms.update(m) # convert replacements into a grammar (for Lark) rules = [] for atom in all_atoms: rule_parts = [] if atom in replacements: for m in replacements[atom]: rule_parts.append(" ".join( a.lower() for a in m)) # rule names must be lowercase if atom != "e": rule_parts.append(f'"{atom}"') # add terminal rule_parts = " | ".join(rule_parts) rules.append(f"{atom.lower()}: {rule_parts}") grammar = "\n".join(rules) # parse the medicine string using the grammar parser = Lark(grammar, start="e") tree = parser.parse(medicine) # the number of transforms is the number of non-leaf nodes return node_count(tree) - leaf_count(tree)
def parse_file(file_name): larker = Lark(RULES) parse_string = '' with open( file_name, 'r', ) as f: for line in f.read().split('\n'): parse_string += line # Pre-processes the cmoji file, replacing emojis with their parse equivelants for emoji in emoji_table: parse_string = parse_string.replace(emoji, emoji_table[emoji]) parse_string = parse_string.replace('\t', ' ').replace('\n', ' ') try: tree = larker.parse(parse_string) except Exception: print(parse_string) print('Needs more emoji\'s, mate') raise Exception return tree
def compile(self, pattern, ptr): parser = Lark(self.grammar, start=self.start) tstring = BehaviorTemplate(pattern) result = [ x[1] if x[1] != '' else x[2] for x in re.findall(tstring.pattern, tstring.template) if x[0] == '' ] # result = [ max(x) for x in re.findall(tstring.pattern, tstring.template) ] # re.findall(tstring.pattern, tstring.template) listofvalues = [] for r in result: parse_tree = parser.parse(r) listofvalues.append(self._compile(parse_tree, ptr)) listoftuples = [] for v in itertools.product(*listofvalues): listoftuples.append(v) # "enriquez sanitization"(TM) i = 0 for r in result: pattern = pattern.replace(result[i], "thisSUB" + str(i), 1) result[i] = "thisSUB" + str(i) i += 1 tstring = BehaviorTemplate(pattern) rules = [] for x in listoftuples: sub = dict(zip(tuple(result), x)) rules.append(tstring.safe_substitute(sub)) return rules
def _parse_and_annotate(sentences, grammar_text): """ Accept list of sentence (sentences) and them annotated in format of list[list[tuple(token: str, tag1: str, tag2: str)]] """ parser = Lark(grammar_text, parser="earley", lexer="standard", propagate_positions=True) annotated_sentences = [] for sentence in tqdm(sentences, desc="Parsing and creating sentence annotations", total=len(sentences)): parse_tree = parser.parse(sentence) sentence = sentence.lower() tokens = _tokenizer.split_words(sentence=sentence) ner_tags = _create_ner_tags(tokens, parse_tree) annotated_sentences.append(list(zip(tokens, ner_tags))) return annotated_sentences
def bacula_parse(daemon="bareos-dir", hn=False): # Parse the preprocessed config with lark-parser parser = Lark(r""" ?value: resources | resource | directive | string string : ESCAPED_STRING resource : (string "{" "\n" (directive|resource)* "}" "\n") resources : resource* directive : string " " "=" " " string "\n" %import common.ESCAPED_STRING %import common.WORD %import common.WS """, start='value') config = preprocess_config(daemon, hn) if not config: return None tree = parser.parse(config) trans = MyTransformer().transform(tree) # pp = pprint.PrettyPrinter(indent=4) # pp.pprint(trans) return trans
def s_expression_to_il(library_file): """ Convert an s-expression file to an internal representation :param library_file: A file descriptor :return: """ # Get paths relative to the location of this file, not the root of the module script_dir = os.path.dirname(os.path.realpath(__file__)) with open(os.path.join(script_dir, "s_expression.lark")) as grammar: parser = Lark( grammar.read() + "\n", parser="lalr", transformer=SExpressionTransformer(), start="list", ) parsed = parser.parse(library_file.read()) symbols = [child for child in parsed.children if child.name == "symbol"] res = LibraryIL() for symbol in symbols: symbol_il = symbol_to_il(symbol) res.symbols[symbol_il.name] = symbol_il return res
def compile(self, program): parser = Lark(grammar, start='root', parser='lalr', postlex=TreeIndenter()) try: tree = parser.parse(program + os.linesep) except (lark_exceptions.UnexpectedToken, lark_exceptions.UnexpectedCharacters) as exc: raise MipsSyntaxError(exc) if self.debug: print(tree) builder = InstBuilder() builder.visit(tree) self.program = builder.program self.labels = builder.labels self.resolve_labels() self.validate() return "\n".join([ f'{line:20} // {i:2}: {desc}' for i, (line, desc) in enumerate(self.final_program) ])
def __init__(self, spec_path: str): decl_parser = Lark(syntax) # type: ignore self.decls = {} self.number_types = set(["number"]) self.symbol_types = set(["symbol"]) with open(spec_path, "r") as spec_f: lines = [] for line in spec_f.read().split("\n"): if line.startswith("#include"): included = line.split()[1].strip("\"") lines.extend( open(os.path.dirname(spec_path) + "/" + included, "r").read().split("\n")) else: lines.append(line) for line in lines: if line.startswith(".decl"): tree = decl_parser.parse(line) assert tree.data == "decl" assert tree.children[0].type == "NAME" # type: ignore decl_name = tree.children[0].value # type: ignore assert tree.children[1].data == "arguments" # type: ignore args = [] for arg in tree.children[1].children: # type: ignore assert arg.data == "argvalue" # type: ignore assert arg.children[0].type == "NAME" # type: ignore assert arg.children[1].type == "NAME" # type: ignore args.append((arg.children[0].value, arg.children[1].value)) # type: ignore self.decls[decl_name] = args elif line.startswith(".number_type"): self.number_types.add(line.split()[1]) elif line.startswith(".symbol_type"): self.symbol_types.add(line.split()[1])
def parse_line(line): gr = """ start: NT " " expr expr: NT | T | ready_expr | star_expr | or_expr | set_expr NT: ("A".."Z") ("0".."9")* T: "eps" | ("a".."z") ("0".."9")* star_expr: NT "*" | T "*" | "(" ready_expr ")" "*" | "(" expr ")" "*" or_expr: expr " | " expr | "(" expr " | " expr ")" set_expr: (expr " ")+ expr ready_expr: ((NT|T) " ")+ (NT|T) """ p = Lark(gr) return p.parse(line)
class Parser: def __init__(self): self._grammar = self._load_grammar() self._parser = Lark(self._grammar, parser='lalr', propagate_positions=True) def parse(self, contents, print_tree=False): tree = self._parser.parse(contents) if print_tree: print('--------AST----------') print(tree.pretty()) print('---------------------') return AstTransformer().transform(tree) def parse_file(self, path, print_tree=False): with open(path) as f: return self.parse(f.read(), print_tree=print_tree) def _load_grammar(self): grammar_path = os.path.join(os.path.dirname(__file__), "grammar.txt") with open(grammar_path) as f: return f.read()
def main(): code = """ i * i + i * i """ grammar = r""" s: e "+" s -> s_f | e -> s_f e: INT "*" e -> e_f | INT -> e_f INT: "i" DELIM: /[ \r\t\n\f]/ WS: (DELIM)+ %ignore WS """ c = CG() parser = Lark(grammar, start="s", transformer=c, parser='lalr', debug=False) tree = parser.parse(code) print('***') print(tree)
def compile(): arithmetic_grammar = r''' ?start: expr ";" ?expr: term | expr "+" term -> add | expr "-" term -> sub ?term: factor | term "*" factor -> mul | term "/" factor -> div ?factor: "-" factor -> neg | "+" factor | INTEGER -> number | "(" expr ")" INTEGER: "0" | "1".."9" INT? %import common.INT %import common.WS_INLINE %ignore WS_INLINE ''' @v_args(inline=True) class TreeToResult(Transformer): from operator import add, sub, mul, truediv as div, neg def number(self, n): return int(n.value) arithmetic_parser = Lark(arithmetic_grammar, parser='lalr', lexer='basic', propagate_positions=False, maybe_placeholders=False, transformer=TreeToResult(), _plugins=lark_cython.plugins) return lambda s: arithmetic_parser.parse(s + ';')
return children[0][1:-1] def CNAME(self, children): return children[0][1:-1] test_program = """ { (On, 10), (Off, 20), loop 3: { (On, 1), (Off, 2) } } """ def loadBSL(program): """Parses a Burnlight Scheduling Language string into a Program""" parser = Lark(schedule_grammar) return ProgramTransformer().transform(parser.parse(program)) if __name__ == '__main__': parser = Lark(schedule_grammar) tree = parser.parse(test_program) print(tree.pretty()) program = ProgramTransformer().transform(tree) print(program)
action="store_true", help="verbose output") ap.add_argument("--version", action="version", version="%(prog)s 1.0") args = ap.parse_args() logging.basicConfig(format="[%(levelname)s] %(message)s", level=logging.DEBUG if args.verbose else logging.WARNING) parser = Lark(grammar, start="program") with open(args.infile, "r") as f: logging.debug("Trimming inline whitespace from input") raw = f.read() trim = raw.replace(" ", "").replace("\t", "") logging.debug("Parsing") tree = parser.parse(trim) logging.debug("Parse tree:\n" + tree.pretty()) logging.debug("Computing label positions") # first pass removes labels and keeps track of their locations var_loc = 16 symtab = {} instructions = [] for c in tree.children: if c.data == "label": symbol = c.children[0] if symbol in symtab: logging.error("Symbol {} defined multiple times on line {}".format( symbol, symbol.line)) sys.exit(-1)
T_INTLITERAL : /[0-9]+/i | /0x[a-f0-9]+/i T_DOUBLELITERAL: /(\\d)+\\.(\\d)*/ | /(\\d)*\\.(\\d)+/ | /(\\d)+\.(\\d)*E[+-]?(\\d)+/ T_STRINGLITERAL : /"[^\\n"]*"/ T_BOOLEANLITERAL.2 : "true" | "false" RESERVED.2 : "{" | "}" | "+" | "-" | "*" | "/" | "%" | "<" | "<=" | ">" | ">=" | "==" | "=" | "!=" | "&&" | "||" | "!" | ";" | "," | "." | "[]" | "[" | "]" | "(" | ")" | "void" | "interface" | "double" | "bool" | "string" | "class" | "int" | "null" | "this" | "extend" | "implement" | "for" | "while" | "if" | "else" | "return" | "break" | "new" | "NewArray" | "Print" | "ReadInteger" | "ReadLine" SL_COMMENT: "//" /[^\\n]*/ "\\n" ML_COMMENT: "/*" /(\\*(?!\\/)|[^*])*/ "*/" %ignore SL_COMMENT %ignore ML_COMMENT %import common.WS %ignore WS ''' class CodeGen(Transformer): def t(self, args): token = args[0] if token.type == 'RESERVED': print(token.value) else: print(token.type + ' ' + token.value) l = Lark(g, transformer=CodeGen(), parser='lalr') for line in stdin: l.parse(line)
from lark import Lark l = Lark(''' start: bar+ bar: /a|b|c*/ "foo" ''') l.parse('afoobfooccfoo') l2 = Lark(''' start: "bar"+ ''') l2.parse('barbarbar')
args = parser.parse_args() toylang_grammar = '' text = '' with open('toylang_ll1.lark', 'r') as myfile: toylang_grammar = myfile.read() print('starting') grammar = Lark(toylang_grammar) with open(args.input_path, 'r') as myfile: text = myfile.read() parse_tree = grammar.parse(text) print('parsing done') codegen = CodeGen() module = codegen.module builder = codegen.builder printf = codegen.printf ast_generator = TreeToAst(module, builder, printf, debug=args.debug) ast_generator.transform(parse_tree) for fn in ast_generator.function_definition_list:
except ValueError: maskhashes.append(maskhash) masks.append(mask) maskindex = len(masks) - 1 return maskindex #Restraint data class Restraint(object): def __init__(self): self.maskindices = [] restraints = [] masks = [] maskhashes = [] tree = parser.parse(tbldata) assert tree.data == "assign_statements" #print(tree.pretty()); sys.exit() form = "assign_statement_2" if args.mode == "position": form = "assign_statement_positional" for node in tree.children: if node.data != form: raise Exception("Restraints of the form '{}' are not supported".format(node.data)) if form == "assign_statement_2": sele1, sele2, distance, dminus, dplus = node.children elif form == "assign_statement_positional": sele, distance, dminus, dplus, xyz, vector = node.children else: raise Exception
def get_ast_from_idl_string(idl_string): global _parser if _parser is None: _parser = Lark(grammar, start='specification') return _parser.parse(idl_string)
class BifParser: def __init__(self): self.bif_parser = Lark(bif_grammar, start='model', lexer='standard') def parse(self, filename): tree = self.bif_parser.parse(open(filename).read()) self.net = None for inst in tree.children: self.read_tree(inst) print(str(self.net)) return self.net def read_tree(self, t): if t.data == "network": name = t.children[0].children[0].value name = remove_quote(name) self.net = Gltm(name) if t.data == "variable": name = t.children[0].children[0].value name = remove_quote(name) type = t.children[1].children[0].value if type == "discrete": num_states = int(t.children[2].children[0].value) variable = DiscreteVariable(name, num_states) self.net.addNode(variable) states = [] for ins in t.children[3:]: states.append(ins.children[0].value) if type == "continuous": variable = SingularContinuousVariable(name) self.net.addNode(variable) if t.data == "root_prob": varname = t.children[0].children[0].children[0].value varname = remove_quote(varname) node = self.net.getNode(varname) variable = node.variable table = t.children[1].children prob = np.array(read_row(table)) node.potential.setCells([variable], prob) if t.data == "nonroot_prob": childins = t.children[0].children children = [] for ins in childins: child = ins.children[0].value child = remove_quote(child) children.append(child) parent = t.children[1].children[0].children[0].value parent = remove_quote(parent) childnodes = [self.net.getNode(child) for child in children] # only one parent parentnode = self.net.getNode(parent) prob = [] for ins in t.children[2:]: state_prob = read_row(ins.children[1:]) prob.append(state_prob) prob = np.array(prob) if isinstance(childnodes[0], DiscreteBeliefNode): for childnode in childnodes: self.net.addEdge(childnode, parentnode) childnode.potential.setCells( [parentnode.variable, childnode.variable], prob) if isinstance(childnodes[0], ContinuousBeliefNode): newnode = self.net.combine(True, childnodes) self.net.addEdge(newnode, parentnode) dim = len(newnode.variable.variables) mus = prob[:, :dim] rest = prob[:, dim:] covs = [] for row in rest: cov = row.reshape((1, dim, dim)) covs.append(cov) covs = np.concatenate(covs, axis=0) newnode.potential.setEntries(mus, covs)
multibenzo = join('_') def stack(self, values): if values: direction, number = values return str(direction), int(number) def meta(self, values): mapping = {'x': 1, 'y': 1, 'z': 1} for pair in values: if pair is None: continue mapping[pair[0]] = pair[1] order = sorted(mapping) return '_'.join('%s%s' % (k, mapping[k]) for k in order) molecule = join('_') if __name__ == '__main__': groups = pairs for test, t in groups: print test, t try: assert t == ExactName().transform(parser.parse(test)) pass except Exception as e: print e pass print "---"*10
def elementolista(self, *args): parsed = [] for i, _ in enumerate(args): if len(args) > 1 and isinstance(args[i], Symbol): parsed.append(Symbol(args[i].value.value)) parsed.extend(args[i + 1:]) elif len(args) == 1 and isinstance(args[i], Symbol): return Symbol(args[i].value.value) else: return list(args) return parsed if __name__ == '__main__': exemplos = [ "(+ 1 2)", "(odd? 42)", "(let ((x 1) (y 2)) (+ x y))", "((diff cos) x)", "(max 1 2)", "(max (list 1 2 3))", # ")a b c(", # "(a b", # "(a b))" ] for exemplo in exemplos: print('-' * 100) print(exemplo) tree = grammar.parse(exemplo) print('TREE\n', tree.pretty()) print(grammar.parse(exemplo))
class RqlCompiler(Visitor): def __init__(self, larkfile): with open(larkfile) as f: self.parser = Lark(f.read(), propagate_positions=True) self.commands = [] def start(self, ast): commands = [] commands = list(filter(lambda c: isinstance(c, Tree), ast.children)) self.commands = list(map(lambda c: c.cmd, commands)) def statement(self, ast): ast.cmd = ast.children[0].cmd def load_statement(self, ast): _, toponame, _, varname = ast.children ast.cmd = LoadCommand(toponame, varname.value) def define_statement(self, ast): _, data_type, data_spec, selection = ast.children data_spec = data_spec.spec if data_type != data_spec.data_type: raise Exception('Definition type mismatch at Line %s: %s, %s' % (ast.meta.line, data_type, data_spec)) selection = selection.selection if selection.constraints is not None: raise Exception('DEFINE statement MUST NOT have constraints at Line %s' % (ast.meta.line)) ast.cmd = DefineCommand(data_type, data_spec, selection) def property_spec(self, ast): varname, vartype, value = ast.children varname = varname.value vartype = vartype.value value = value.value ast.spec = DataSpec(varname, vartype, value) def cost_spec(self, ast): varname, vartype, value, accum_func = ast.children varname = varname.value vartype = vartype.value value = value.value accum_func = accum_func.value ast.spec = DataSpec(varname, vartype, value, accum_func) def element_selection(self, ast): for_each = ast.children[0] element_type = for_each.element_type toponame = for_each.toponame if len(ast.children) > 1: that = ast.children[1] constraints = that.constraints # TODO: assert data_type == constraints.data_type else: constraints = None ast.selection = ElementSelection(toponame, element_type, constraints) def for_each_clause(self, ast): _, _, element_type, _, toponame = ast.children ast.element_type = element_type.value ast.toponame = toponame.value def that_clause(self, ast): _, constraints = ast.children ast.constraints = constraints.constraints def set_statement(self, ast): if len(ast.children) == 4: _, data_type, varname, selection = ast.children value = None else: _, data_type, varname, value, selection = ast.children selection = selection.selection ast.cmd = SetCommand(data_type, varname.value, value.value, selection) def prop_constraint(self, ast): if len(ast.children) == 1: ast.constraints = ast.children[0].constraints else: c1, op, c2 = ast.children ast.constraints = CompoundConstraint(c1.constraints, op, c2.constraints) def or_prop_constraint(self, ast): if len(ast.children) == 1: ast.constraints = ast.children[0].constraints else: c1, op, c2 = ast.children ast.constraints = CompoundConstraint(c1.constraints, op, c2.constraints) def encap_prop_constraint(self, ast): ast.constraints = ast.children[0].constraints def not_prop_constraint(self, ast): op, c = ast.children ast.constraints = CompoundConstraint(c.constraints, op) def basic_prop_constraint(self, ast): lhs, op, rhs = ast.children ast.constraints = BasicConstraint(lhs.value, op.value, rhs.value) def operand(self, ast): ast.value = ast.children[0].value def var_ref(self, ast): children = ast.children if children[0].type == 'WAYPOINT': waypoint = ast.children[0].value children = children[1:] else: waypoint = None path = list(map(lambda c: c.value, children)) ast.value = VarRef(waypoint, path) def select_statement(self, ast): children = ast.children.copy() if children[0].data == 'opt_clause': opt_obj = children[0].opt_obj children = children[1:] else: opt_obj = None reactive = children[0].reactive ra_expr = children[0].ra_expr toponame = children[0].toponame children = children[1:] if len(children) > 0 and children[0].data == 'where_clause': constraints = children[0].constraints children = children[1:] else: constraints = None if len(children) > 0 and children[0].data == 'as_clause': varname = children[0].varname children = children[1:] else: varname = None ast.cmd = SelectCommand(ra_expr, toponame, varname, reactive, constraints, opt_obj) def opt_clause(self, ast): _, opt_obj, _ = ast.children ast.opt_obj = opt_obj def select_clause(self, ast): mode, ra_expr, _, toponame = ast.children ast.reactive = mode == 'WATCH' ast.ra_expr = ra_expr.expr ast.toponame = toponame def ra_expr(self, ast): children = list(map(lambda c: c.value, ast.children)) waypoints = children[::2] patterns = children[1::2] ast.expr = RouteAlgebraExpr(waypoints, patterns) def where_clause(self, ast): _, constraints = ast.children ast.constraints = constraints.constraints def as_clause(self, ast): ast.varname = ast.children[1] def drop_statement(self, ast): ast.cmd = DropCommand(ast.children[1].value) def show_statement(self, ast): if len(ast.children) == 2: selection = None else: selection = ast.children[2].selection var_ref = ast.children[1].value ast.cmd = ShowCommand(var_ref, selection) def default(self, ast): ast.value = ast.children[0].value def value(self, ast): print(ast) def number(self, ast): value = ast.children[0].value if '.' in value: ast.value = Value('float', float(value)) else: ast.value = Value('int', int(value)) def string(self, ast): value = ast.children[0].value ast.value = Value('string', value.strip('\"')) def compile(self, program, show_ast=False): self.commands = [] ast = self.parser.parse(program) if show_ast: print(ast.pretty()) self.visit(ast) return self.commands
from lark import Lark GRAMMAR=''' start: (_NEWLINE | stmt)* NAME: /[a-zA-Z_]\w*/ COMMENT: /--[^\n]*/ _NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+ %ignore /[\t \f]+/ %ignore /\\[\t \f]*\r?\n/ ''' l = Lark(GRAMMAR) ast = l.parse() print(ast)
def compile(): # NOTE: the ; after value is to detect the end of the input json_grammar = r""" ?start: _WS? value _WS? ";" ?value: object | array | string | NUMBER -> number | "true" -> true | "false" -> false | "null" -> null array : _BRACK1 [value (_COMMA value)*] _BRACK2 object : _CURLY1 [pair (_COMMA pair)*] _CURLY2 pair : string _COLON value _COLON: /\s*:\s*/ _COMMA: /\s*,\s*/ _CURLY1: /\s*{\s*/ _CURLY2: /\s*}\s*/ _BRACK1: /\s*\[\s*/ _BRACK2: /\s*\]\s*/ string : STRING STRING: "\"" INNER* "\"" INNER: /[ !#-\[\]-\U0010ffff]*/ | /\\(?:["\/\\bfnrt]|u[0-9A-Fa-f]{4})/ NUMBER : INTEGER FRACTION? EXPONENT? INTEGER: ["-"] ("0" | "1".."9" INT?) FRACTION: "." INT EXPONENT: ("e"|"E") ["+"|"-"] INT _WS: /\s+/ %import common.INT """ class TreeToJson(Transformer): @v_args(inline=True) def string(self, s): return json_unescape(s.value) array = list pair = tuple object = dict @v_args(inline=True) def number(self, n): return float(n.value) null = lambda self, _: None true = lambda self, _: True false = lambda self, _: False json_parser = Lark( json_grammar, parser='lalr', lexer='basic', propagate_positions=False, maybe_placeholders=False, transformer=TreeToJson(), _plugins=lark_cython.plugins, ) # trailing ; is currently necessary to detect end of input return lambda s: json_parser.parse(s + ';')
class Parser: def __init__(self): grammar = file_to_string('grammars/parser.lark') self.__lark_parser = Lark(grammar, parser='lalr', transformer=Parser.MyTransformer()) def parse(self, statement): try: statement = statement.split('#')[0] # remove comments if statement != '': self.__lark_parser.parse(statement) except LarkError as e: raise ParserException(e) @v_args(inline=True) # Affects the signatures of the methods class MyTransformer(Transformer): def __init__(self): pass # TOP LEVEL OPERATIONS ------------------------------------------------ def add_rule(self, lhs, rhs): expert_system.ExpertSystem.instance.add_rule(lhs, rhs) def set_facts(self, *args): expert_system.ExpertSystem.instance.set_facts(args) def query_vars(self, *args): expert_system.ExpertSystem.instance.process_queries(args) def show_info(self): expert_system.ExpertSystem.instance.show_info() def del_rule(self, index): expert_system.ExpertSystem.instance.del_rule(index) def verbose_on(self): expert_system.ExpertSystem.instance.set_verbose(True) def verbose_off(self): expert_system.ExpertSystem.instance.set_verbose(False) def visualize(self): expert_system.ExpertSystem.instance.visualize() def reset(self): expert_system.ExpertSystem.instance.reset() def dance(self): expert_system.ExpertSystem.instance.dance() def doge(self): expert_system.ExpertSystem.instance.doge() # LHS ------------------------------------------------ def xor_op(self, a, b): return a + ' ^ ' + b def or_op(self, a, b): return a + ' | ' + b def and_op(self, a, b): return a + ' + ' + b def not_op(self, a): return '!' + a def parentheses(self, a): return '(' + a + ')' # RHS ------------------------------------------------ def rhs_var(self, var): return [ var ] def rhs_and_var(self, rhs, var): rhs.append(var) return rhs # PARSING TOKENS ------------------------------------------------ def parse_var(self, token): return token.value def parse_index(self, token): return int(token.value)
class ActionSequenceDataset(Dataset): def __init__(self, bnf_path, lark_path, texts_dir, action_getter_path='', action_sequences_dir='', start=None, lang_grammar_start='start'): super().__init__() self.texts_dir = texts_dir self.action_sequences_dir = action_sequences_dir self.start = start self.text_filenames = parse_utils.Enumerator([ dir_entry.name for dir_entry in os.scandir(texts_dir) if dir_entry.is_file() ]) # Get rule dictionary of the language # First check if action getter already exists, if not then parse the language grammar to create it if os.path.exists(action_getter_path): with open(action_getter_path, 'rb') as f: action_getter = pickle.load(f) else: my_bnf_parser = parse_utils.CustomBNFParser() _, rules_dict, symbol_names = my_bnf_parser.parse_file( bnf_path, start=lang_grammar_start) action_getter = parse_utils.SimpleTreeActionGetter( rules_dict, symbol_names) if action_getter_path: with open(action_getter_path, 'wb') as f: pickle.dump(action_getter, f) self.action_getter = action_getter with open(lark_path) as f: self.parser = Lark(f, keep_all_tokens=True, start=lang_grammar_start) def index(self, text_filename): return self.text_filenames.index(text_filename) def __getitem__(self, index): # First check if action sequence of parse tree of the text file already exists, if not then calculate it text_filename = self.text_filenames[index] text_file_path = os.path.join(self.texts_dir, text_filename) text_action_sequence_path = os.path.join(self.action_sequences_dir, text_filename + '.pickle') if os.path.exists(text_action_sequence_path): with open(text_action_sequence_path, 'rb') as f: action_sequences = pickle.load(f) else: with open(text_file_path) as f: # Get parse tree of the text file written in the language defined by the given grammar text_tree = self.parser.parse(f.read(), start=self.start) id_tree = self.action_getter.simple_tree_to_id_tree( parse_utils.SimpleTree.from_lark_tree(text_tree)) # Get sequence of actions taken by each non-terminal symbol in 'prefix DFS left-to-right' order action_sequences = self.action_getter.collect_actions(id_tree) if self.action_sequences_dir: with open(text_action_sequence_path, 'wb') as f: pickle.dump(action_sequences, f) actions, parent_actions = action_sequences return torch.tensor(actions), torch.tensor(parent_actions) def __len__(self): return len(self.text_filenames)
# Ex.: [], [e], [e,e,e] listas_com_elementos_separados_por_virgulas = r""" start : cochetes+ (elemento*virgula*)* cochetes+ cochetes: "[" | "]" elemento: "e" virgula: "," """ # Elementos e listas que podem conter outras listas. # Ex.: e, [], [[]], [e], [e,[e,[]],e] listas_aninhadas = r""" start : conj* conjcochetes* (cochetes+conj*)* conjcochetes: cochetes cochetes cochetes: "[" | "]" conj: (elemento+virgula*) elemento: "e" | "" | virgula: "," """ # Expressões matemáticas no estilo prefixo. # Ex.: 42, + 1 2, + * 10 2 2 operadores_prefixos = r""" start : "..." """ grammar = Lark(letras_dentro_de_parenteses) code = "(LL)" result = grammar.parse(code) print(result.pretty())
'aa & not bb', 'aa & bb > 23.54 | cc & dd', 'aa and bb > 22 and cc > 33 and dd > 44 ', '((aa and bb > 22) and cc > 33) and dd > 44 ', '(aa and bb > 22) and (cc > 33 and dd > 44) ', '(aa and bb > 22 and cc > 33 and dd > 44) ', 'aa and bb > 23.54 or 22 in cc and dd', 'aa & bb > 23.54 | (22 in cc & dd)', 'aa and bb > 23.54 or (22 in cc and dd)', 'aa and not (bb > 23.54 or (22 in cc and dd))', 'expression = (bb/3-1)*cc', 'energy/n_atoms > 3', '1=3', 'all(aa) > 3', 'any(aa) > 3', 'aa = False', 'aa = [True, True, True]', ) for query in queries: print(query) try: tree = parser.parse(query) # print(tree) # print(tree.pretty()) print(transformer.transform(tree)) except LarkError: raise NotImplementedError
class FolGrammar: FOL_NAMES = ['a', 'b', 'c', 'd', 'e', 'j', 'm', 'r'] FOL_VARIABLES = ['x', 'y', 'w', 'z'] FOL_UNARY = ['I', 'P', 'Q', 'R', 'S', 'T', 'U'] FOL_BINARY = ['A', 'B', 'C', 'D', 'E', 'F'] LARK_FOL_GRAMMAR_SCHEMA = ''' start: formula formula: UNARY (VARIABLE|NAME) -> unary | BINARY (VARIABLE|NAME) (VARIABLE|NAME) -> binary | "(" formula "&" formula ")" -> and | "(" formula "^" formula ")" -> or | "-" formula -> neg | "$" VARIABLE "(" formula ")" -> q_ex | "@" VARIABLE "(" formula ")" -> q_un UNARY: ({}) BINARY: ({}) VARIABLE: ({}) NAME: ({}) WHITESPACE: (" " | "\\n")+ %ignore WHITESPACE ''' def __init__(self): """ Fill the general FOL schema with the actual names, variables, predicates available """ names = '|'.join(['"{}"'.format(n) for n in self.FOL_NAMES]) variables = '|'.join(['"{}"'.format(v) for v in self.FOL_VARIABLES]) unary = '|'.join(['"{}"'.format(u) for u in self.FOL_UNARY]) binary = '|'.join(['"{}"'.format(b) for b in self.FOL_BINARY]) self.LARK_FOL_GRAMMAR = self.LARK_FOL_GRAMMAR_SCHEMA.format( unary, binary, variables, names) # load parser self.fol_parser = Lark(self.LARK_FOL_GRAMMAR) return def parse_expression_with_grammar(self, expression): """ Parse a given expression (a string) to a parse tree according to the grammar, returning None if it fails to do so :return: Lark parse tree or None if expression cannot be parsed """ try: return self.fol_parser.parse(expression) except Exception as ex: print(ex) print('Expression "{}" cannot be parsed'.format(expression)) return None def get_free_variables_from_formula_recursively(self, formula, free_variables, bound_variables): """ Recursively traverse the formula tree and retrieve variables which are free, i.e. variables not in the scope of a quantifier. :param formula: parsed tree from Lark (possibly sub-tree) :param free_variables: list of free variables encountered in parsing :param bound_variables: list of bound variables encountered in parsing :return: list of all variables (unique values only) that are free in the formula """ # if it is a quantifier node, mark the variable as bound and go on if formula.data in ['q_ex', 'q_un']: # first child is variable bounded bound_variables.append(formula.children[0]) self.get_free_variables_from_formula_recursively( formula.children[1], free_variables, bound_variables) # if it is a terminal, check that variables are not bound/already included in the list elif formula.data in ['unary', 'binary']: args = formula.children[1:] for a in args: if self.is_variable( a ) and a not in bound_variables and a not in free_variables: free_variables.append(str(a)) # if anything else, just continue the examination in all children path else: for f in formula.children: self.get_free_variables_from_formula_recursively( f, free_variables, bound_variables) return free_variables """ Some utility functions below """ def get_lark_grammar(self): return self.LARK_FOL_GRAMMAR def is_variable(self, x): return x in self.FOL_VARIABLES def is_name(self, x): return x in self.FOL_NAMES
def loadBSL(program): """Parses a Burnlight Scheduling Language string into a Program""" parser = Lark(schedule_grammar) return ProgramTransformer().transform(parser.parse(program))