def parse(self, string): for elem in self.padWithSequenceOperators(string): if isinstance(elem, regOp.Operator): self.pushOperator(elem) elif regOp.isOperator(elem): self.pushOperator(regOp.getOperator(elem)) else: #if character self.pushCharacter(elem) while len(self._operatorStack) > 0: tempOp = self._operatorStack.pop() if isinstance(tempOp, regOp.BracketOperator): raise Exception(RegexParser._unbalanced_parenth_except) else: self._outputQueue.append(tempOp) return self._outputQueue
def testParser(self): self.assertRaises(Exception, p.RegexParser().parse, "(a|b)+)") self.assertRaises(Exception, p.RegexParser().parse, "((a|b)+") self.assertRaises(Exception, p.RegexParser().parse, "(a|)b)+") self.assertListEqual(p.RegexParser().parse(""), []) self.assertListEqual(p.RegexParser().parse("a"), ['a']) self.assertListEqual(p.RegexParser().parse("ab"), ['a','b',o.getSquenceOperator()]) self.assertListEqual(p.RegexParser().parse("a|b"), ['a','b',o.getOperator('|')]) self.assertListEqual(p.RegexParser().parse("(a|b)+"), ['a','b',o.getOperator('|'), o.getOperator('+')]) self.assertListEqual(p.RegexParser().parse("((a)|b)+"), ['a','b',o.getOperator('|'), o.getOperator('+')]) self.assertListEqual(p.RegexParser().parse("(a|b)+c"), ['a','b',o.getOperator('|'), o.getOperator('+'), 'c', o.getSquenceOperator()]) self.assertListEqual(p.RegexParser().parse("(a|b)+c*"), ['a','b',o.getOperator('|'), o.getOperator('+'), 'c', o.getOperator('*'), o.getSquenceOperator()]) self.assertListEqual(p.RegexParser().parse("c(a|b)+"), ['c','a','b', o.getOperator('|'), o.getOperator('+'), o.getSquenceOperator()]) self.assertListEqual(p.RegexParser().parse("((a))"), ['a']) self.assertListEqual(p.RegexParser().parse("a+b+"), ['a',o.getOperator('+'),'b',o.getOperator('+'),o.getSquenceOperator()])
def padWithSequenceOperators(self,string): paddedString = [] pastChar = '' for idx,char in enumerate(string): if idx != 0: if regOp.isOperator(char): if regOp.isOpeningBracket(char) and not regOp.isOpeningBracket(pastChar): paddedString.append(regOp.getSquenceOperator()) else: #is character if regOp.isOperator(pastChar): if regOp.isClosingBracket(pastChar) or regOp.isLeftAssociativeUnaryOperator(regOp.getOperator(pastChar)): paddedString.append(regOp.getSquenceOperator()) else: #pastChar is character paddedString.append(regOp.getSquenceOperator()) paddedString.append(char) pastChar = char return paddedString