def lex_analysis(input_string, user_defined_lexer_rule): token_name_nfa_mapping = {} token_name_action_mapping = {} for token_object, token_regex, token_action in user_defined_lexer_rule: nfa_object = MicroRegEx.compile(token_regex) token_name_nfa_mapping[token_object] = nfa_object token_name_action_mapping[token_object] = token_action result = [] current_target_char = input_string while True: if not current_target_char: # job done break matched_str, token_object = match_token(current_target_char, token_name_nfa_mapping, token_name_action_mapping) if matched_str: current_target_char = current_target_char[len(matched_str):] else: raise ValueError("lexer parse failed") result.append((matched_str, token_object)) return result
def base(self, file): with open(file) as f: self.text = f.readlines() for line in self.text: if line.startswith('#'): # comment line continue line_list = line.split() f_str = None pattern = None t_str = None if len(line_list) == 2: pattern, t_str = line_list elif len(line_list) == 3: pattern, t_str, f_str = line_list nfa = MicroRegEx.compile(pattern, use_dfa=True).simplify() self.assertEqual(nfa.match(t_str), True) if f_str: self.assertEqual(nfa.match(f_str), False) print(line, "pass")
import MicroRegEx from MicroRegEx.Automaton.NFA2DFA import NFA2DFA nfa = MicroRegEx.compile("(a|b)c?") dfa = NFA2DFA(nfa).convert() dfa.plot()
import MicroRegEx regex = MicroRegEx.compile("(a|b)c?") regex.plot()