Пример #1
0
    def test_toUnicode_UTF16(self):
        """Tests converting and parsing an utf-16 encoded string."""
        #======================================================================
        # with BOM
        #======================================================================
        temp = codecs.BOM_UTF16_LE +\
               unicode(TEST_W_ENC % 'UTF-16').encode('UTF-16le')
        # with cutting declaration
        data, enc = toUnicode(temp, True)
        assert enc == 'utf-16le'
        assert data == TEST_BASE
        # without cutting declaration
        data, enc = toUnicode(temp, False)
        assert enc == 'utf-16le'
        assert data == TEST_W_ENC % 'UTF-16'

        temp = codecs.BOM_UTF16_BE +\
               unicode(TEST_W_ENC % 'UTF-16').encode('UTF-16be')
        # with cutting declaration
        data, enc = toUnicode(temp, True)
        assert enc == 'utf-16be'
        assert data == TEST_BASE
        # without cutting declaration
        data, enc = toUnicode(temp, False)
        assert enc == 'utf-16be'
        assert data == TEST_W_ENC % 'UTF-16'

        #======================================================================
        # without BOM
        #======================================================================
        # this case can not easily be handled, utf-8 is expected if no bom is
        # found and declaration is not readable
        temp = unicode(TEST_W_ENC % 'UTF-16').encode('UTF-16le')
        data, enc = toUnicode(temp, True)
        assert enc == 'utf-8'
Пример #2
0
 def test_toUnicode_UTF16(self):
     """Tests converting and parsing an utf-16 encoded string."""
     #======================================================================
     # with BOM
     #======================================================================
     temp = codecs.BOM_UTF16_LE +\
            unicode(TEST_W_ENC % 'UTF-16').encode('UTF-16le')
     # with cutting declaration
     data, enc = toUnicode(temp, True)
     assert enc == 'utf-16le'
     assert data == TEST_BASE
     # without cutting declaration
     data, enc = toUnicode(temp, False)
     assert enc == 'utf-16le'
     assert data == TEST_W_ENC % 'UTF-16'
     
     temp = codecs.BOM_UTF16_BE +\
            unicode(TEST_W_ENC % 'UTF-16').encode('UTF-16be')
     # with cutting declaration
     data, enc = toUnicode(temp, True)
     assert enc == 'utf-16be'
     assert data == TEST_BASE
     # without cutting declaration
     data, enc = toUnicode(temp, False)
     assert enc == 'utf-16be'
     assert data == TEST_W_ENC % 'UTF-16'
     
     #======================================================================
     # without BOM
     #======================================================================
     # this case can not easily be handled, utf-8 is expected if no bom is 
     # found and declaration is not readable 
     temp = unicode(TEST_W_ENC % 'UTF-16').encode('UTF-16le')
     data, enc = toUnicode(temp, True)
     assert enc == 'utf-8'
Пример #3
0
    def test_toUnicode_utf8(self):
        """Tests converting and parsing an utf-8 encoded string."""
        #======================================================================
        # with BOM
        #======================================================================
        # with encoding
        temp = codecs.BOM_UTF8 + unicode(TEST_W_ENC % 'UTF-8').encode('UTF-8')
        # with cutting declaration
        data, enc = toUnicode(temp, True)
        assert data == TEST_BASE
        assert enc == 'utf-8'
        # without cutting declaration
        data, enc = toUnicode(temp, False)
        assert data == TEST_W_ENC % 'UTF-8'
        assert enc == 'utf-8'

        # without encoding
        temp = codecs.BOM_UTF8 + unicode(TEST_WO_ENC).encode('UTF-8')
        # with cutting declaration
        data, enc = toUnicode(temp, True)
        assert data == TEST_BASE
        assert enc == 'utf-8'
        # without cutting declaration
        data, enc = toUnicode(temp, False)
        assert data == TEST_WO_ENC
        assert enc == 'utf-8'

        #======================================================================
        # without BOM
        #======================================================================
        # witch encoding
        temp = unicode(TEST_W_ENC % 'UTF-8').encode('UTF-8')
        # with cutting declaration
        data, enc = parseXMLDeclaration(temp, True)
        assert data == TEST_BASE
        assert enc == 'utf-8'
        # without cutting declaration
        data, enc = parseXMLDeclaration(temp, False)
        assert data == TEST_W_ENC % 'UTF-8'
        assert enc == 'utf-8'

        # without encoding
        temp = unicode(TEST_WO_ENC).encode('UTF-8')
        # with cutting declaration
        data, enc = parseXMLDeclaration(temp, True)
        assert data == TEST_BASE
        assert enc == 'utf-8'
        # without cutting declaration
        data, enc = parseXMLDeclaration(temp, False)
        assert data == TEST_WO_ENC
        assert enc == 'utf-8'
Пример #4
0
 def test_toUnicode_utf8(self):
     """Tests converting and parsing an utf-8 encoded string."""
     #======================================================================
     # with BOM
     #======================================================================
     # with encoding
     temp = codecs.BOM_UTF8 + unicode(TEST_W_ENC % 'UTF-8').encode('UTF-8')
     # with cutting declaration
     data, enc = toUnicode(temp, True)
     assert data == TEST_BASE
     assert enc == 'utf-8'
     # without cutting declaration
     data, enc = toUnicode(temp, False)
     assert data == TEST_W_ENC % 'UTF-8'
     assert enc == 'utf-8'
     
     # without encoding
     temp = codecs.BOM_UTF8 + unicode(TEST_WO_ENC).encode('UTF-8')
     # with cutting declaration
     data, enc = toUnicode(temp, True)
     assert data == TEST_BASE
     assert enc == 'utf-8'
     # without cutting declaration
     data, enc = toUnicode(temp, False)
     assert data == TEST_WO_ENC
     assert enc == 'utf-8'
     
     #======================================================================
     # without BOM
     #======================================================================
     # witch encoding
     temp = unicode(TEST_W_ENC % 'UTF-8').encode('UTF-8')
     # with cutting declaration
     data, enc = parseXMLDeclaration(temp, True)
     assert data == TEST_BASE
     assert enc == 'utf-8'
     # without cutting declaration
     data, enc = parseXMLDeclaration(temp, False)
     assert data == TEST_W_ENC % 'UTF-8'
     assert enc == 'utf-8'
     
     # without encoding
     temp = unicode(TEST_WO_ENC).encode('UTF-8')
     # with cutting declaration
     data, enc = parseXMLDeclaration(temp, True)
     assert data == TEST_BASE
     assert enc == 'utf-8'
     # without cutting declaration
     data, enc = parseXMLDeclaration(temp, False)
     assert data == TEST_WO_ENC
     assert enc == 'utf-8'
Пример #5
0
def newXMLDocument(data, id=None, uid=None):
    """
    Returns a new XmlDocument object.

    Data will be converted to unicode and a possible XML declaration will be
    removed. Use this method whenever you wish to create a XmlDocument 
    manually!
    """
    # check for data
    if len(data) == 0:
        raise InvalidParameterError("XML document is empty.")
    # convert data to unicode and remove XML declaration
    if isinstance(data, unicode):
        data, _ = parseXMLDeclaration(data, remove_decl=True)
    else:
        data, _ = toUnicode(data, remove_decl=True)
    return XmlDocument(data, id, uid)
Пример #6
0
def newXMLDocument(data, id=None, uid=None):
    """
    Returns a new XmlDocument object.

    Data will be converted to unicode and a possible XML declaration will be
    removed. Use this method whenever you wish to create a XmlDocument 
    manually!
    """
    # check for data
    if len(data) == 0:
        raise InvalidParameterError("XML document is empty.")
    # convert data to unicode and remove XML declaration
    if isinstance(data, unicode):
        data, _ = parseXMLDeclaration(data, remove_decl=True)
    else:
        data, _ = toUnicode(data, remove_decl=True)
    return XmlDocument(data, id, uid)