def test_03_00(self): """TestMultiPassStringMarker.test_03_00(): C++ comment using removeSetReplaceClear().""" myStr = u'// C\n\n' myMps = MultiPassString.MultiPassString(io.StringIO(myStr)) # Mark comment for c in myMps.genChars(): if c == '\n' and myMps.hasWord: # Replace by a single space # remove and mark myMps.removeSetReplaceClear(True, 'C++ comment', ' ') elif c == '/' and myMps.prevChar != '/': # Start of comment myMps.setMarker() elif myMps.wordLength == 1 and c != '/': myMps.clearMarker() # Check the result of this pass o = [c for c in myMps.genChars()] self.assertEqual(' \n', ''.join(o)) n = [c for c in myMps.genWords()] self.assertEqual( [ ('// C\n', 'C++ comment'), ('\n', 'Unknown'), ], n, )
def test_01(self): """TestMultiPassStringMarker.test_01(): Empty string.""" myMps = MultiPassString.MultiPassString(io.StringIO(u'')) o = [c for c in myMps.genChars()] #print o self.assertEqual([], o) n = [c for c in myMps.genWords()] self.assertEqual([], n) self.assertEqual([], myMps.currentString) self.assertEqual(0, myMps.idxChar) self.assertEqual({}, myMps.idxTypeMap)
def __init__(self, theFileObj=None, theFileId=None, theDiagnostic=None): """Constructor. :param theFileObj: File object. :type theFileObj: ``_io.TextIOWrapper`` :param theFileId: File ID such as the path. :type theFileId: ``str`` :param theDiagnostic: A diagnostic for processing messages. :type theDiagnostic: ``NoneType`` :returns: ``NoneType`` """ super(ItuToTokens, self).__init__(theFileObj, theFileId, theDiagnostic) self._mps = MultiPassString.MultiPassString(theFileObj)
def test_05(self): """TestMultiPassStringMarker.test_05(): Spaces and numbers, single pass.""" myStr = u""" 1 12 123 1234""" myMps = MultiPassString.MultiPassString(io.StringIO(myStr)) o = [] cType = MultiPassString.MultiPassString.UNKNOWN_TOKEN_TYPE myMps.setMarker() for c in myMps.genChars(): o.append(c) if c.isspace(): # Whitespace character if cType != 'whitespace' and myMps.hasWord: myMps.setWordType(cType, isTerm=False) myMps.setMarker() cType = 'whitespace' elif c.isdigit(): # Digit character # Stop anything else if cType != 'digit' and myMps.hasWord: myMps.setWordType(cType, isTerm=False) myMps.setMarker() cType = 'digit' # Trailing tokens if myMps.hasWord: myMps.setWordType(cType, isTerm=False) wordS = [w for w in myMps.genWords()] #print 'Characters:' #print o #print 'idxTypeMap' #print myMps.idxTypeMap #print 'Words' #pprint.pprint(wordS) self.assertEqual( [ (' ', 'whitespace'), ('1', 'digit'), (' ', 'whitespace'), ('12', 'digit'), (' ', 'whitespace'), ('123', 'digit'), (' ', 'whitespace'), ('1234', 'digit'), ], wordS, )
def test_03(self): """TestMultiPassStringMarker.test_03(): C++ and line continuation using removeSetReplaceClear().""" myStr = u""" // Some comment \\ and \\ the rest """ myMps = MultiPassString.MultiPassString(io.StringIO(myStr)) o = [] for c in myMps.genChars(): if c == '\n' and myMps.prevChar == '\\': myMps.removeMarkedWord(isTerm=True) # Note we don't myMps.setWordType('line-continuation') # as this is interlaced with the C++ comment. myMps.setMarker() # Check the result of this pass o = [c for c in myMps.genChars()] self.assertEqual('\n// Some comment and the rest\n\n', ''.join(o)) # Mark comment for c in myMps.genChars(): if c == '\n' and myMps.hasWord: # Replace by a single space # remove and mark myMps.removeSetReplaceClear(True, 'C++ comment', ' ') elif c == '/' and myMps.prevChar != '/': # Start of comment myMps.setMarker() elif myMps.wordLength == 1 and c != '/': myMps.clearMarker() # Check the result of this pass o = [c for c in myMps.genChars()] self.assertEqual('\n \n', ''.join(o)) n = [c for c in myMps.genWords()] self.assertEqual( [ ('\n', 'Unknown'), ('// Some comment \\\nand \\\nthe rest\n', 'C++ comment'), # ('\n', 'Unknown'), ], n, )
def __init__(self, theFileObj=None, theFileId=None, theDiagnostic=None): super(ItuToTokens, self).__init__(theFileObj, theFileId, theDiagnostic) self._mps = MultiPassString.MultiPassString(theFileObj)
def test_02(self): """TestMultiPassStringMarker.test_02(): C++ and line continuation using individual functions.""" myStr = u""" // Some comment \\ and \\ the rest """ myMps = MultiPassString.MultiPassString(io.StringIO(myStr)) o = [] for c in myMps.genChars(): if c == '\n' and myMps.prevChar == '\\': myMps.removeMarkedWord(isTerm=True) # Note we don't myMps.setWordType('line-continuation') # as this is interlaced with the C++ comment. #prevChar = c myMps.setMarker() # Check the result of this pass o = [c for c in myMps.genChars()] #print 'Line continuation removed:' #print ''.join(o) #print 'Line continuation removed currentString:' #print myMps.currentString self.assertEqual('\n// Some comment and the rest\n\n', ''.join(o)) #print 'idxTypeMap 0' #print myMps.idxTypeMap # Mark comment for c in myMps.genChars(): if c == '\n' and myMps.hasWord: # Replace by a single space # remove and mark myMps.removeMarkedWord(isTerm=True) #print 'idxTypeMap 1' #print myMps.idxTypeMap myMps.setWordType('C++ comment', isTerm=True) myMps.setAtMarker(' ') myMps.clearMarker() elif c == '/' and myMps.prevChar != '/': # Start of comment myMps.setMarker() elif myMps.wordLength == 1 and c != '/': myMps.clearMarker() # Check the result of this pass o = [c for c in myMps.genChars()] #print 'Comment removed:' #print o #print 'Line continuation removed currentString:' #print myMps.currentString self.assertEqual('\n \n', ''.join(o)) n = [c for c in myMps.genWords()] #print 'idxTypeMap' #print myMps.idxTypeMap # print('Words') # print(n) self.assertEqual( [ ('\n', 'Unknown'), ('// Some comment \\\nand \\\nthe rest\n', 'C++ comment'), # ('\n', 'Unknown'), ], n, )
def test_04(self): """TestMultiPassStringMarker.test_04(): Spaces and numbers, multiple passes.""" myStr = u""" 1 12 123 1234""" myMps = MultiPassString.MultiPassString(io.StringIO(myStr)) # Mark whitespace o = [] myMps.clearMarker() for c in myMps.genChars(): o.append(c) if c.isspace() and not myMps.hasWord: # Whitespace character myMps.setMarker() elif not c.isspace(): # Stop whitespace if myMps.hasWord: myMps.setWordType('whitespace', isTerm=False) myMps.clearMarker() # Trailing whitespace. if myMps.hasWord: myMps.setWordType('whitespace', isTerm=False) wordS = [w for w in myMps.genWords()] #print 'Characters:' #print o #print 'idxTypeMap' #print myMps.idxTypeMap #print 'Words' #print wordS self.assertEqual( [ (' ', 'whitespace'), ('1', 'Unknown'), (' ', 'whitespace'), ('12', 'Unknown'), (' ', 'whitespace'), ('123', 'Unknown'), (' ', 'whitespace'), #('1234', 'Unknown'), ], wordS, ) # Now numbers o = [] myMps.clearMarker() for c in myMps.genChars(): o.append(c) if c.isdigit() and not myMps.hasWord: # Digit character myMps.setMarker() elif not c.isdigit(): # Non-digit if myMps.hasWord: myMps.setWordType('digit', isTerm=False) myMps.clearMarker() # Trailing digits. if myMps.hasWord: myMps.setWordType('digit', isTerm=False) wordS = [w for w in myMps.genWords()] #print 'Characters:' #print o #print 'idxTypeMap' #print myMps.idxTypeMap #print 'Words' #pprint.pprint(wordS) self.assertEqual( [ (' ', 'whitespace'), ('1', 'digit'), (' ', 'whitespace'), ('12', 'digit'), (' ', 'whitespace'), ('123', 'digit'), (' ', 'whitespace'), ('1234', 'digit'), ], wordS, )