Пример #1
0
 def test_03_00(self):
     """TestMultiPassStringMarker.test_03_00(): C++ comment using removeSetReplaceClear()."""
     myStr = u'// C\n\n'
     myMps = MultiPassString.MultiPassString(io.StringIO(myStr))
     # Mark comment
     for c in myMps.genChars():
         if c == '\n' and myMps.hasWord:
             # Replace by a single space
             # remove and mark
             myMps.removeSetReplaceClear(True, 'C++ comment', ' ')
         elif c == '/' and myMps.prevChar != '/':
             # Start of comment
             myMps.setMarker()
         elif myMps.wordLength == 1 and c != '/':
             myMps.clearMarker()
     # Check the result of this pass
     o = [c for c in myMps.genChars()]
     self.assertEqual(' \n', ''.join(o))
     n = [c for c in myMps.genWords()]
     self.assertEqual(
         [
             ('// C\n', 'C++ comment'),
             ('\n', 'Unknown'),
         ],
         n,
     )
Пример #2
0
 def test_01(self):
     """TestMultiPassStringMarker.test_01(): Empty string."""
     myMps = MultiPassString.MultiPassString(io.StringIO(u''))
     o = [c for c in myMps.genChars()]
     #print o
     self.assertEqual([], o)
     n = [c for c in myMps.genWords()]
     self.assertEqual([], n)
     self.assertEqual([], myMps.currentString)
     self.assertEqual(0, myMps.idxChar)
     self.assertEqual({}, myMps.idxTypeMap)
Пример #3
0
    def __init__(self, theFileObj=None, theFileId=None, theDiagnostic=None):
        """Constructor.

        :param theFileObj: File object.
        :type theFileObj: ``_io.TextIOWrapper``

        :param theFileId: File ID such as the path.
        :type theFileId: ``str``

        :param theDiagnostic: A diagnostic for processing messages.
        :type theDiagnostic: ``NoneType``

        :returns: ``NoneType``
        """
        super(ItuToTokens, self).__init__(theFileObj, theFileId, theDiagnostic)
        self._mps = MultiPassString.MultiPassString(theFileObj)
Пример #4
0
 def test_05(self):
     """TestMultiPassStringMarker.test_05(): Spaces and numbers, single pass."""
     myStr = u""" 1  12   123    1234"""
     myMps = MultiPassString.MultiPassString(io.StringIO(myStr))
     o = []
     cType = MultiPassString.MultiPassString.UNKNOWN_TOKEN_TYPE
     myMps.setMarker()
     for c in myMps.genChars():
         o.append(c)
         if c.isspace():
             # Whitespace character
             if cType != 'whitespace' and myMps.hasWord:
                 myMps.setWordType(cType, isTerm=False)
                 myMps.setMarker()
             cType = 'whitespace'
         elif c.isdigit():
             # Digit character
             # Stop anything else
             if cType != 'digit' and myMps.hasWord:
                 myMps.setWordType(cType, isTerm=False)
                 myMps.setMarker()
             cType = 'digit'
     # Trailing tokens
     if myMps.hasWord:
         myMps.setWordType(cType, isTerm=False)
     wordS = [w for w in myMps.genWords()]
     #print 'Characters:'
     #print o
     #print 'idxTypeMap'
     #print myMps.idxTypeMap
     #print 'Words'
     #pprint.pprint(wordS)
     self.assertEqual(
         [
             (' ', 'whitespace'),
             ('1', 'digit'),
             ('  ', 'whitespace'),
             ('12', 'digit'),
             ('   ', 'whitespace'),
             ('123', 'digit'),
             ('    ', 'whitespace'),
             ('1234', 'digit'),
         ],
         wordS,
     )
Пример #5
0
    def test_03(self):
        """TestMultiPassStringMarker.test_03(): C++ and line continuation using removeSetReplaceClear()."""
        myStr = u"""
// Some comment \\
and \\
the rest

"""
        myMps = MultiPassString.MultiPassString(io.StringIO(myStr))
        o = []
        for c in myMps.genChars():
            if c == '\n' and myMps.prevChar == '\\':
                myMps.removeMarkedWord(isTerm=True)
                # Note we don't myMps.setWordType('line-continuation')
                # as this is interlaced with the C++ comment.
            myMps.setMarker()
        # Check the result of this pass
        o = [c for c in myMps.genChars()]
        self.assertEqual('\n// Some comment and the rest\n\n', ''.join(o))
        # Mark comment
        for c in myMps.genChars():
            if c == '\n' and myMps.hasWord:
                # Replace by a single space
                # remove and mark
                myMps.removeSetReplaceClear(True, 'C++ comment', ' ')
            elif c == '/' and myMps.prevChar != '/':
                # Start of comment
                myMps.setMarker()
            elif myMps.wordLength == 1 and c != '/':
                myMps.clearMarker()
        # Check the result of this pass
        o = [c for c in myMps.genChars()]
        self.assertEqual('\n \n', ''.join(o))
        n = [c for c in myMps.genWords()]
        self.assertEqual(
            [
                ('\n', 'Unknown'),
                ('// Some comment \\\nand \\\nthe rest\n', 'C++ comment'),
                #                ('\n', 'Unknown'),
            ],
            n,
        )
Пример #6
0
 def __init__(self, theFileObj=None, theFileId=None, theDiagnostic=None):
     super(ItuToTokens, self).__init__(theFileObj, theFileId, theDiagnostic)
     self._mps = MultiPassString.MultiPassString(theFileObj)
Пример #7
0
    def test_02(self):
        """TestMultiPassStringMarker.test_02(): C++ and line continuation using individual functions."""
        myStr = u"""
// Some comment \\
and \\
the rest

"""
        myMps = MultiPassString.MultiPassString(io.StringIO(myStr))
        o = []
        for c in myMps.genChars():
            if c == '\n' and myMps.prevChar == '\\':
                myMps.removeMarkedWord(isTerm=True)
                # Note we don't myMps.setWordType('line-continuation')
                # as this is interlaced with the C++ comment.
            #prevChar = c
            myMps.setMarker()
        # Check the result of this pass
        o = [c for c in myMps.genChars()]
        #print 'Line continuation removed:'
        #print ''.join(o)
        #print 'Line continuation removed currentString:'
        #print myMps.currentString
        self.assertEqual('\n// Some comment and the rest\n\n', ''.join(o))
        #print 'idxTypeMap 0'
        #print myMps.idxTypeMap
        # Mark comment
        for c in myMps.genChars():
            if c == '\n' and myMps.hasWord:
                # Replace by a single space
                # remove and mark
                myMps.removeMarkedWord(isTerm=True)
                #print 'idxTypeMap 1'
                #print myMps.idxTypeMap
                myMps.setWordType('C++ comment', isTerm=True)
                myMps.setAtMarker(' ')
                myMps.clearMarker()
            elif c == '/' and myMps.prevChar != '/':
                # Start of comment
                myMps.setMarker()
            elif myMps.wordLength == 1 and c != '/':
                myMps.clearMarker()
        # Check the result of this pass
        o = [c for c in myMps.genChars()]
        #print 'Comment removed:'
        #print o
        #print 'Line continuation removed currentString:'
        #print myMps.currentString
        self.assertEqual('\n \n', ''.join(o))
        n = [c for c in myMps.genWords()]
        #print 'idxTypeMap'
        #print myMps.idxTypeMap
        #        print('Words')
        #        print(n)
        self.assertEqual(
            [
                ('\n', 'Unknown'),
                ('// Some comment \\\nand \\\nthe rest\n', 'C++ comment'),
                #                ('\n', 'Unknown'),
            ],
            n,
        )
Пример #8
0
 def test_04(self):
     """TestMultiPassStringMarker.test_04(): Spaces and numbers, multiple passes."""
     myStr = u""" 1  12   123    1234"""
     myMps = MultiPassString.MultiPassString(io.StringIO(myStr))
     # Mark whitespace
     o = []
     myMps.clearMarker()
     for c in myMps.genChars():
         o.append(c)
         if c.isspace() and not myMps.hasWord:
             # Whitespace character
             myMps.setMarker()
         elif not c.isspace():
             # Stop whitespace
             if myMps.hasWord:
                 myMps.setWordType('whitespace', isTerm=False)
             myMps.clearMarker()
     # Trailing whitespace.
     if myMps.hasWord:
         myMps.setWordType('whitespace', isTerm=False)
     wordS = [w for w in myMps.genWords()]
     #print 'Characters:'
     #print o
     #print 'idxTypeMap'
     #print myMps.idxTypeMap
     #print 'Words'
     #print wordS
     self.assertEqual(
         [
             (' ', 'whitespace'),
             ('1', 'Unknown'),
             ('  ', 'whitespace'),
             ('12', 'Unknown'),
             ('   ', 'whitespace'),
             ('123', 'Unknown'),
             ('    ', 'whitespace'),
             #('1234', 'Unknown'),
         ],
         wordS,
     )
     # Now numbers
     o = []
     myMps.clearMarker()
     for c in myMps.genChars():
         o.append(c)
         if c.isdigit() and not myMps.hasWord:
             # Digit character
             myMps.setMarker()
         elif not c.isdigit():
             # Non-digit
             if myMps.hasWord:
                 myMps.setWordType('digit', isTerm=False)
             myMps.clearMarker()
     # Trailing digits.
     if myMps.hasWord:
         myMps.setWordType('digit', isTerm=False)
     wordS = [w for w in myMps.genWords()]
     #print 'Characters:'
     #print o
     #print 'idxTypeMap'
     #print myMps.idxTypeMap
     #print 'Words'
     #pprint.pprint(wordS)
     self.assertEqual(
         [
             (' ', 'whitespace'),
             ('1', 'digit'),
             ('  ', 'whitespace'),
             ('12', 'digit'),
             ('   ', 'whitespace'),
             ('123', 'digit'),
             ('    ', 'whitespace'),
             ('1234', 'digit'),
         ],
         wordS,
     )