示例#1
0
 def test_unicode(self):
     space = self.space
     s = u'hello world'
     w_ret = parsestring.parsestr(space, None, repr(s))
     ret = space.unwrap(w_ret)
     assert isinstance(ret, unicode)
     assert ret == s
     s = u'hello\n world'
     w_ret = parsestring.parsestr(self.space, None, repr(s))
     ret = space.unwrap(w_ret)
     assert isinstance(ret, unicode)
     assert ret == s
     s = "u'''hello\\x42 world'''"
     w_ret = parsestring.parsestr(self.space, None, s)
     ret = space.unwrap(w_ret)
     assert isinstance(ret, unicode)
     assert ret == u'hello\x42 world'
     s = "u'''hello\\u0842 world'''"
     w_ret = parsestring.parsestr(self.space, None, s)
     ret = space.unwrap(w_ret)
     assert isinstance(ret, unicode)
     assert ret == u'hello\u0842 world'
     s = "u'\x81'"
     s = s.decode("koi8-u").encode("utf8")
     w_ret = parsestring.parsestr(self.space, 'koi8-u', s)
     ret = space.unwrap(w_ret)
     assert ret == eval("# -*- coding: koi8-u -*-\nu'\x81'") 
示例#2
0
 def test_unicode_literals(self):
     space = self.space
     w_ret = parsestring.parsestr(space, None, repr("hello"), True)
     assert space.isinstance_w(w_ret, space.w_unicode)
     w_ret = parsestring.parsestr(space, None, "b'hi'", True)
     assert space.isinstance_w(w_ret, space.w_bytes)
     w_ret = parsestring.parsestr(space, None, "r'hi'", True)
     assert space.isinstance_w(w_ret, space.w_unicode)
示例#3
0
 def test_bytes(self):
     space = self.space
     b = "b'hello'"
     w_ret = parsestring.parsestr(space, None, b)
     assert space.unwrap(w_ret) == "hello"
     b = "b'''hello'''"
     w_ret = parsestring.parsestr(space, None, b)
     assert space.unwrap(w_ret) == "hello"
示例#4
0
 def test_unicode_literals(self):
     space = self.space
     w_ret = parsestring.parsestr(space, None, repr("hello"))
     assert space.isinstance_w(w_ret, space.w_unicode)
     w_ret = parsestring.parsestr(space, None, "b'hi'")
     assert space.isinstance_w(w_ret, space.w_str)
     w_ret = parsestring.parsestr(space, None, "r'hi'")
     assert space.isinstance_w(w_ret, space.w_unicode)
示例#5
0
 def test_bytes(self):
     space = self.space
     b = "b'hello'"
     w_ret = parsestring.parsestr(space, None, b)
     assert space.unwrap(w_ret) == "hello"
     b = "b'''hello'''"
     w_ret = parsestring.parsestr(space, None, b)
     assert space.unwrap(w_ret) == "hello"
示例#6
0
def build_atom(builder, nb):
    atoms = get_atoms(builder, nb)
    top = atoms[0]
    if isinstance(top, TokenObject):
        # assert isinstance(top, TokenObject) # rtyper
        if top.name == builder.parser.tokens['LPAR']:
            if len(atoms) == 2:
                builder.push(ast.Tuple([], top.lineno))
            else:
                builder.push(atoms[1])
        elif top.name == builder.parser.tokens['LSQB']:
            if len(atoms) == 2:
                builder.push(ast.List([], top.lineno))
            else:
                list_node = atoms[1]
                list_node.lineno = top.lineno
                builder.push(list_node)
        elif top.name == builder.parser.tokens['LBRACE']:
            items = []
            for index in range(1, len(atoms) - 1, 4):
                # a   :   b   ,   c : d
                # ^  +1  +2  +3  +4
                items.append((atoms[index], atoms[index + 2]))
            builder.push(ast.Dict(items, top.lineno))
        elif top.name == builder.parser.tokens['NAME']:
            val = top.get_value()
            builder.push(ast.Name(val, top.lineno))
        elif top.name == builder.parser.tokens['NUMBER']:
            builder.push(
                ast.Const(builder.eval_number(top.get_value()), top.lineno))
        elif top.name == builder.parser.tokens['STRING']:
            # need to concatenate strings in atoms
            s = ''
            if len(atoms) == 1:
                token = atoms[0]
                assert isinstance(token, TokenObject)
                builder.push(
                    ast.Const(
                        parsestr(builder.space, builder.source_encoding,
                                 token.get_value()), top.lineno))
            else:
                space = builder.space
                empty = space.wrap('')
                accum = []
                for token in atoms:
                    assert isinstance(token, TokenObject)
                    accum.append(
                        parsestr(builder.space, builder.source_encoding,
                                 token.get_value()))
                w_s = space.call_method(empty, 'join', space.newlist(accum))
                builder.push(ast.Const(w_s, top.lineno))
        elif top.name == builder.parser.tokens['BACKQUOTE']:
            builder.push(ast.Backquote(atoms[1], atoms[1].lineno))
        else:
            raise SyntaxError("unexpected tokens", top.lineno, top.col)
示例#7
0
def build_atom(builder, nb):
    atoms = get_atoms(builder, nb)
    top = atoms[0]
    if isinstance(top, TokenObject):
        # assert isinstance(top, TokenObject) # rtyper
        if top.name == builder.parser.tokens["LPAR"]:
            if len(atoms) == 2:
                builder.push(ast.Tuple([], top.lineno))
            else:
                builder.push(atoms[1])
        elif top.name == builder.parser.tokens["LSQB"]:
            if len(atoms) == 2:
                builder.push(ast.List([], top.lineno))
            else:
                list_node = atoms[1]
                list_node.lineno = top.lineno
                builder.push(list_node)
        elif top.name == builder.parser.tokens["LBRACE"]:
            items = []
            for index in range(1, len(atoms) - 1, 4):
                # a   :   b   ,   c : d
                # ^  +1  +2  +3  +4
                items.append((atoms[index], atoms[index + 2]))
            builder.push(ast.Dict(items, top.lineno))
        elif top.name == builder.parser.tokens["NAME"]:
            val = top.get_value()
            builder.push(ast.Name(val, top.lineno))
        elif top.name == builder.parser.tokens["NUMBER"]:
            builder.push(ast.Const(builder.eval_number(top.get_value()), top.lineno))
        elif top.name == builder.parser.tokens["STRING"]:
            # need to concatenate strings in atoms
            s = ""
            if len(atoms) == 1:
                token = atoms[0]
                assert isinstance(token, TokenObject)
                builder.push(ast.Const(parsestr(builder.space, builder.source_encoding, token.get_value()), top.lineno))
            else:
                space = builder.space
                empty = space.wrap("")
                accum = []
                for token in atoms:
                    assert isinstance(token, TokenObject)
                    accum.append(parsestr(builder.space, builder.source_encoding, token.get_value()))
                w_s = space.call_method(empty, "join", space.newlist(accum))
                builder.push(ast.Const(w_s, top.lineno))
        elif top.name == builder.parser.tokens["BACKQUOTE"]:
            builder.push(ast.Backquote(atoms[1], atoms[1].lineno))
        else:
            raise SyntaxError("unexpected tokens", top.lineno, top.col)
示例#8
0
def decode_string_literal(space, s, w_encoding=None):
    from pypy.interpreter.pyparser.parsestring import parsestr
    if space.is_true(w_encoding):
        encoding = space.str_w(w_encoding)
    else:
        encoding = None
    return parsestr(space, encoding, s)
示例#9
0
def decode_string_literal(space, s, w_encoding=None):
    from pypy.interpreter.pyparser.parsestring import parsestr
    if space.is_true(w_encoding):
        encoding = space.str_w(w_encoding)
    else:
        encoding = None
    return parsestr(space, encoding, s)
示例#10
0
 def test_simple_enc_roundtrip(self):
     space = self.space
     s = "'\x81\\t'"
     s = s.decode("koi8-u").encode("utf8")
     w_ret = parsestring.parsestr(self.space, 'koi8-u', s)
     ret = space.unwrap(w_ret)
     assert ret == eval("# -*- coding: koi8-u -*-\n'\x81\\t'")
示例#11
0
 def test_simple_enc_roundtrip(self):
     space = self.space
     s = "'\x81\\t'"
     s = s.decode("koi8-u").encode("utf8")
     w_ret = parsestring.parsestr(self.space, 'koi8-u', s)
     ret = space.unwrap(w_ret)
     assert ret == eval("# -*- coding: koi8-u -*-\nu'\x81\\t'") 
示例#12
0
 def test_simple_enc_roundtrip(self):
     #py.test.skip("crashes in app_codecs, but when cheating using .encode at interp-level passes?!")
     space = self.space
     s = "'\x81'"
     s = s.decode("koi8-u").encode("utf8")
     w_ret = parsestring.parsestr(self.space, 'koi8-u', s)
     ret = space.unwrap(w_ret)
     assert ret == eval("# -*- coding: koi8-u -*-\n'\x81'") 
示例#13
0
 def parse_and_compare(self, literal, value):
     space = self.space
     w_ret = parsestring.parsestr(space, None, literal)
     if isinstance(value, str):
         assert space.type(w_ret) == space.w_str
         assert space.str_w(w_ret) == value
     elif isinstance(value, unicode):
         assert space.type(w_ret) == space.w_unicode
         assert space.unicode_w(w_ret) == value
     else:
         assert False
示例#14
0
 def parse_and_compare(self, literal, value, encoding=None):
     space = self.space
     w_ret = parsestring.parsestr(space, encoding, literal)
     if isinstance(value, str):
         assert space.type(w_ret) == space.w_bytes
         assert space.bytes_w(w_ret) == value
     elif isinstance(value, unicode):
         assert space.type(w_ret) == space.w_unicode
         assert space.unicode_w(w_ret) == value
     else:
         assert False
示例#15
0
 def parse_and_compare(self, literal, value, encoding=None):
     space = self.space
     w_ret = parsestring.parsestr(space, encoding, literal)
     if isinstance(value, str):
         assert space.type(w_ret) == space.w_bytes
         assert space.str_w(w_ret) == value
     elif isinstance(value, unicode):
         assert space.type(w_ret) == space.w_unicode
         assert space.utf8_w(w_ret).decode('utf8') == value
     else:
         assert False
示例#16
0
    def test_unicode(self):
        for s in ['hello world', 'hello\n world']:
            self.parse_and_compare(repr(s), unicode(s))

        self.parse_and_compare("'''hello\\x42 world'''", u'hello\x42 world')
        self.parse_and_compare("'''hello\\u0842 world'''",
                               u'hello\u0842 world')

        s = "u'\x81'"
        s = s.decode("koi8-u").encode("utf8")[1:]
        w_ret = parsestring.parsestr(self.space, 'koi8-u', s)
        ret = w_ret._utf8.decode('utf8')
        assert ret == eval("# -*- coding: koi8-u -*-\nu'\x81'")
示例#17
0
    def test_unicode(self):
        space = self.space
        for s in [u"hello world", u"hello\n world"]:
            self.parse_and_compare(repr(s), s)

        self.parse_and_compare("u'''hello\\x42 world'''", u"hello\x42 world")
        self.parse_and_compare("u'''hello\\u0842 world'''", u"hello\u0842 world")

        s = "u'\x81'"
        s = s.decode("koi8-u").encode("utf8")
        w_ret = parsestring.parsestr(self.space, "koi8-u", s)
        ret = space.unwrap(w_ret)
        assert ret == eval("# -*- coding: koi8-u -*-\nu'\x81'")
示例#18
0
    def test_unicode(self):
        space = self.space
        for s in [u'hello world', u'hello\n world']:
            self.parse_and_compare(repr(s), s)

        self.parse_and_compare("u'''hello\\x42 world'''", u'hello\x42 world')
        self.parse_and_compare("u'''hello\\u0842 world'''",
                               u'hello\u0842 world')

        s = "u'\x81'"
        s = s.decode("koi8-u").encode("utf8")
        w_ret = parsestring.parsestr(self.space, 'koi8-u', s)
        ret = space.unwrap(w_ret)
        assert ret == eval("# -*- coding: koi8-u -*-\nu'\x81'")
示例#19
0
    def test_simple(self):
        space = self.space
        s = 'hello world'
        w_ret = parsestring.parsestr(space, None, repr(s))
        assert space.str_w(w_ret) == s
        s = 'hello\n world'
        w_ret = parsestring.parsestr(space, None, repr(s))
        assert space.str_w(w_ret) == s
        s = "'''hello\\x42 world'''"
        w_ret = parsestring.parsestr(space, None, s)
        assert space.str_w(w_ret) == 'hello\x42 world'
        s = r'"\0"'
        w_ret = parsestring.parsestr(space, None, s)
        assert space.str_w(w_ret) == chr(0)
        s = r'"\07"'
        w_ret = parsestring.parsestr(space, None, s)
        assert space.str_w(w_ret) == chr(7)
        s = r'"\123"'
        w_ret = parsestring.parsestr(space, None, s)
        assert space.str_w(w_ret) == chr(0123)
        s = r'"\x"'
        space.raises_w(space.w_ValueError, parsestring.parsestr, space, None,
                       s)
        s = r'"\x7"'
        space.raises_w(space.w_ValueError, parsestring.parsestr, space, None,
                       s)
        s = r'"\x7g"'
        space.raises_w(space.w_ValueError, parsestring.parsestr, space, None,
                       s)
        s = r'"\xfF"'
        w_ret = parsestring.parsestr(space, None, s)
        assert space.str_w(w_ret) == chr(0xFF)

        s = r'"\""'
        w_ret = parsestring.parsestr(space, None, s)
        assert space.str_w(w_ret) == '"'

        s = r"'\''"
        w_ret = parsestring.parsestr(space, None, s)
        assert space.str_w(w_ret) == "'"
示例#20
0
    def test_simple(self):
        space = self.space
        s = 'hello world'
        w_ret = parsestring.parsestr(space, None, repr(s))
        assert space.str_w(w_ret) == s
        s = 'hello\n world'
        w_ret = parsestring.parsestr(space, None, repr(s))
        assert space.str_w(w_ret) == s
        s = "'''hello\\x42 world'''"
        w_ret = parsestring.parsestr(space, None, s)
        assert space.str_w(w_ret) == 'hello\x42 world'
        s = r'"\0"'
        w_ret = parsestring.parsestr(space, None, s)
        assert space.str_w(w_ret) == chr(0)
        s = r'"\07"'
        w_ret = parsestring.parsestr(space, None, s)
        assert space.str_w(w_ret) == chr(7)
        s = r'"\123"'
        w_ret = parsestring.parsestr(space, None, s)
        assert space.str_w(w_ret) == chr(0123)
        s = r'"\x"'
        space.raises_w(space.w_ValueError, parsestring.parsestr, space, None, s)
        s = r'"\x7"'
        space.raises_w(space.w_ValueError, parsestring.parsestr, space, None, s)
        s = r'"\x7g"'
        space.raises_w(space.w_ValueError, parsestring.parsestr, space, None, s)
        s = r'"\xfF"'
        w_ret = parsestring.parsestr(space, None, s)
        assert space.str_w(w_ret) == chr(0xFF)

        s = r'"\""'
        w_ret = parsestring.parsestr(space, None, s)
        assert space.str_w(w_ret) == '"'
        
        s = r"'\''"
        w_ret = parsestring.parsestr(space, None, s)
        assert space.str_w(w_ret) == "'"
示例#21
0
def string_parse_literal(astbuilder, atom_node):
    space = astbuilder.space
    encoding = astbuilder.compile_info.encoding
    joined_pieces = []
    fmode = False
    for i in range(atom_node.num_children()):
        child = atom_node.get_child(i)
        try:
            w_next = parsestring.parsestr(space, encoding, child.get_value(),
                                          child)
            if not isinstance(w_next, parsestring.W_FString):
                add_constant_string(astbuilder, joined_pieces, w_next,
                                    atom_node)
            else:
                parse_f_string(astbuilder, joined_pieces, w_next, atom_node)
                fmode = True

        except error.OperationError as e:
            if e.match(space, space.w_UnicodeError):
                kind = '(unicode error) '
            elif e.match(space, space.w_ValueError):
                kind = '(value error) '
            elif e.match(space, space.w_SyntaxError):
                kind = ''
            else:
                raise
            # Unicode/ValueError/SyntaxError (without position information) in
            # literal: turn into SyntaxError with position information
            e.normalize_exception(space)
            errmsg = space.text_w(space.str(e.get_w_value(space)))
            raise astbuilder.error('%s%s' % (kind, errmsg), child)

    if not fmode and len(joined_pieces) == 1:  # <= the common path
        return joined_pieces[0]  # ast.Str, Bytes or FormattedValue

    # with more than one piece, it is a combination of Str and
    # FormattedValue pieces---if there is a Bytes, then we got
    # an invalid mixture of bytes and unicode literals
    for node in joined_pieces:
        if isinstance(node, ast.Bytes):
            astbuilder.error("cannot mix bytes and nonbytes literals",
                             atom_node)
    assert fmode
    return f_string_to_ast_node(astbuilder, joined_pieces, atom_node)
示例#22
0
 def test_raw_unicode_literals(self):
     space = self.space
     w_ret = parsestring.parsestr(space, None, "r'\u'")
     assert space.int_w(space.len(w_ret)) == 2
示例#23
0
 def test_multiline_unicode_strings_with_backslash(self):
     space = self.space
     s = '"""' + '\\' + '\n"""'
     w_ret = parsestring.parsestr(space, None, s)
     assert space.str_w(w_ret) == ''
示例#24
0
 def test_bug1(self):
     space = self.space
     expected = ['x', ' ', chr(0xc3), chr(0xa9), ' ', '\n']
     input = ["'", 'x', ' ', chr(0xc3), chr(0xa9), ' ', chr(92), 'n', "'"]
     w_ret = parsestring.parsestr(space, 'utf8', ''.join(input))
     assert space.str_w(w_ret) == ''.join(expected)
示例#25
0
 def test_multiline_unicode_strings_with_backslash(self):
     space = self.space
     s = '"""' + '\\' + '\n"""'
     w_ret = parsestring.parsestr(space, None, s)
     assert space.str_w(w_ret) == ''
示例#26
0
 def test_raw_unicode_literals(self):
     space = self.space
     w_ret = parsestring.parsestr(space, None, "r'\u'")
     assert space.int_w(space.len(w_ret)) == 2
示例#27
0
 def test_bug1(self):
     space = self.space
     expected = ['x', ' ', chr(0xc3), chr(0xa9), ' ', '\n']
     input = ["'", 'x', ' ', chr(0xc3), chr(0xa9), ' ', chr(92), 'n', "'"]
     w_ret = parsestring.parsestr(space, 'utf8', ''.join(input))
     assert space.str_w(w_ret) == ''.join(expected)
示例#28
0
 def test_bug1(self):
     space = self.space
     expected = ["x", " ", chr(0xC3), chr(0xA9), " ", "\n"]
     input = ["'", "x", " ", chr(0xC3), chr(0xA9), " ", chr(92), "n", "'"]
     w_ret = parsestring.parsestr(space, "utf8", "".join(input))
     assert space.str_w(w_ret) == "".join(expected)