def get_interpretation(self, words): rulename = self.__class__.__name__ logger.debug("Trying to match with regex: {}".format(rulename)) match = refo.match(self.regex + Literal(_EOL), words + [_EOL]) if not match: logger.debug("No match") return None, None try: match = Match(match, words) result = self.interpret(match) except BadSemantic as error: logger.debug(str(error)) return None, None try: expression, userdata = result except TypeError: expression, userdata = result, None #expression.rule_used = rulename for x in expression: x.rule_used = rulename return expression, userdata
"... sort of.") parser.add_argument("filename", action="store") cfg = parser.parse_args() text = open(cfg.filename).read() from refo import finditer, Predicate, Literal, Any, Group, Star def notin(xs): return lambda x: x not in xs name = Predicate(notin("/")) + Star(Predicate(notin(" >"))) name = Group(name, "name") inside = name + Star(Any(), greedy=False) opentag = Literal("<") + inside + Literal(">") opentag = Group(opentag, "open") closetag = Literal("<") + Literal("/") + inside + Literal(">") closetag = Group(closetag, "close") regex = closetag | opentag depth = 0 for m in finditer(regex, text): if "open" in m: i, j = m["name"] print(" " * depth + text[i:j]) depth += 1 else: assert "close" in m depth -= 1
#!/usr/bin/python from refo import Literal, Question, match import re import time # This regular expression is known to kill the python re module # because it exploits the fact that the implementation has exponential # worst case complexity. # Instead, this implementation has polinomial worst case complexity, # and therefore this test should finish in a reasonable time. # You might want to try with N = 20, 30, 40, 100 to see what happens N = 25 a = Literal("a") string = "a" * N regex = Question(a) * N + a * N start = time.time() m = match(regex, string) end = time.time() print "Refo finished in {0:.2} seconds".format(end - start) regex = "(:?a?){{{0}}}a{{{0}}}".format(N) regex = re.compile(regex) start = time.time() regex.match(string) end = time.time() print "Python re finished in {0:.2} seconds".format(end - start)