def testRegularExpressionExtract(self): self.maxDiff = None gd = RegularExpression('^[0123456789]*$') expected_result = [ PositionToken(content='1', gd=gd, left=3, right=4), PositionToken(content='12', gd=gd, left=3, right=5), PositionToken(content='123', gd=gd, left=3, right=6), PositionToken(content='1234', gd=gd, left=3, right=7), PositionToken(content='2', gd=gd, left=4, right=5), PositionToken(content='23', gd=gd, left=4, right=6), PositionToken(content='234', gd=gd, left=4, right=7), PositionToken(content='3', gd=gd, left=5, right=6), PositionToken(content='34', gd=gd, left=5, right=7), PositionToken(content='4', gd=gd, left=6, right=7)] self.assertListEqual(extract(gd,'abc1234abc'), expected_result) expected_result = [ PositionToken(content=['1'], gd=gd, left=3, right=4), PositionToken(content=['1','2'], gd=gd, left=3, right=5), PositionToken(content=['1','2','3'], gd=gd, left=3, right=6), PositionToken(content=['1','2','3','4'], gd=gd, left=3, right=7), PositionToken(content=['2'], gd=gd, left=4, right=5), PositionToken(content=['2','3'], gd=gd, left=4, right=6), PositionToken(content=['2','3','4'], gd=gd, left=4, right=7), PositionToken(content=['3'], gd=gd, left=5, right=6), PositionToken(content=['3','4'], gd=gd, left=5, right=7), PositionToken(content=['4'], gd=gd, left=6, right=7)] self.assertListEqual(extract(gd,[Token(x, gd) for x in 'abc1234abc']), expected_result) self.assertListEqual(extract(gd,[x for x in 'abc1234abc']), expected_result) self.assertRaises(Exception, extract, None) self.assertListEqual(extract(gd,''), []) #Empty input
def testEncoding(self): ad = ascii_encoding self.assertListEqual(extract(ad, ''), []) self.assertListEqual(extract(ad, 'a£'), [PositionToken('a', ad, 0, 1)]) self.assertListEqual(extract(ad, ['a', '£']), [PositionToken(['a'], ad, 0, 1)]) self.assertRaises(Exception, extract, None)
def my_call_back(graph, element): gne = graph.node[element] if 'parsed' in gne: return # Already parsed flat_list = [] for successor in graph.successors(element): if successor not in graph.node or 'parsed' not in graph.node[successor]: my_call_back(graph, successor) for token in graph.node[successor]['parsed']: flat_list.append(token) sorted_flat_list = remove_subsets(flat_list) lexed_list = [] prev_right = 0 for token in sorted_flat_list: if prev_right != token.left: raise Exception("Non contiguous parsing from sucessors") prev_right = token.right lexed_list.append(token) from pydsl.extract import extract gne['parsed'] = extract(element, lexed_list)
def my_call_back(graph, element): gne = graph.node[element] if 'parsed' in gne: return # Already parsed flat_list = [] for successor in graph.successors(element): if successor not in graph.node or 'parsed' not in graph.node[successor]: raise Exception("Uninitialized graph %s" % successor) for string, gd, left, right in graph.node[successor]['parsed']: flat_list.append(PositionToken(string, gd, left, right)) sorted_flat_list = sorted(flat_list, key=lambda x: x.left) #Orders elements from all sucessors sorted_flat_list = remove_subsets(sorted_flat_list) lexed_list = [] prev_right = 0 for string, gd, left, right in sorted_flat_list: if prev_right != left: raise Exception("Non contiguous parsing from sucessors") prev_right = right lexed_list.append(Token(string, gd)) from pydsl.extract import extract gne['parsed'] = extract(element, lexed_list)
def testEncoding(self): ad = ascii_encoding self.assertListEqual(extract(ad,''), []) self.assertListEqual(extract(ad,'a£'), [PositionToken('a', ad, 0,1)]) self.assertListEqual(extract(ad,['a','£']), [PositionToken(['a'], ad, 0,1)]) self.assertRaises(Exception, extract, None)