class RegexMatcher(object): """ A basic regex parser for ascii characters -- initialize with a pattern passed to either the class constructor or set_pattern Use match* functions to find matches in input strings """ def __init__(self, pattern): self.setPattern(pattern) # pass # generates the state machine for the regex specified by pattern def setPattern(self, pattern): self.pattern = pattern self.stateMachine = NFA(pattern) def findLongestMatch(self, inStr, i): self.stateMachine.reset() j = i prevFinishedIndex = -1 while j < len(inStr) and self.stateMachine.advanceStates(inStr[j]): j += 1 if self.stateMachine.finished(): prevFinishedIndex = j if prevFinishedIndex != -1: return inStr[i:prevFinishedIndex] else: return "" # returns a tuple of (int, str) that contains the integer of the index # of the first char matching the pattern in input string and the str that # matches the pattern # if no match if found, returns a () empty tuple def matchFirst(self, inStr, searchStart=0): self.stateMachine.reset() for i in range(searchStart, len(inStr)): match = self.findLongestMatch(inStr, i) if match: return (i, match) return () # same as matchFirst, except returns a list of all (int, str) # pairs that match the pattern sent to set_pattern # if no matches returns [] def matchAll(self, inStr): result = [] prevMatch = self.matchFirst(inStr, 0) while prevMatch: result.append(prevMatch) startIndex, match = prevMatch iOfLastMatch = startIndex + len(match) prevMatch = self.matchFirst(inStr, iOfLastMatch) return result def __str__(self): return self.pattern + "\n" + str(self.stateMachine)