Python Sanitizer示例，pylexem.utils.Sanitizer Python示例

示例#1

0

显示文件

文件： test_lex.py 项目： kr-g/pylexem

    def test_complex_multi(self):

        inp_text = """
            (2-1j)(-2-1j)
            (.2-1j) (-.2-1j)
            (2.-1j) (-2.-1j)
            (2.0-1j) (-2.0-1j)
            (2-.1j) (2-1.j)
            (2-0.1j) (2-1.0j)
        """

        stream = self.lexx.tokenize(inp_text)
        self.stream = Sanitizer().whitespace(stream)
        self.stream = list(self.stream)

        res = [
            ("(2-1j)", "COMPLEX_NUM"),
            ("(-2-1j)", "COMPLEX_NUM"),
            ("(.2-1j)", "COMPLEX_NUM"),
            ("(-.2-1j)", "COMPLEX_NUM"),
            ("(2.-1j)", "COMPLEX_NUM"),
            ("(-2.-1j)", "COMPLEX_NUM"),
            ("(2.0-1j)", "COMPLEX_NUM"),
            ("(-2.0-1j)", "COMPLEX_NUM"),
            ("(2-.1j)", "COMPLEX_NUM"),
            ("(2-1.j)", "COMPLEX_NUM"),
            ("(2-0.1j)", "COMPLEX_NUM"),
            ("(2-1.0j)", "COMPLEX_NUM"),
        ]

        self.assertEqual(res, self.stream)

示例#2

0

显示文件

文件： test_lex.py 项目： kr-g/pylexem

    def test_multiline_comments(self):
        inp_text = """
                /*
                test
                // test
                
                */
                (*
                test
                // test
                
                *)
            """
        stream = self.lexx.tokenize(inp_text)
        self.stream = Sanitizer().whitespace(stream)
        self.stream = list(self.stream)

        res = [
            (
                "/*\n                test\n                // test\n                \n                */",
                "BLOCK_COMMENT",
            ),
            (
                "(*\n                test\n                // test\n                \n                *)",
                "BLOCK_ROUND_COMMENT",
            ),
        ]

        self.assertEqual(res, self.stream)

示例#3

0

显示文件

 def token_parse_text(self, inp_text):
     stream = self.lexx.tokenize(inp_text)
     self.lxtok = LexerTokens(self.lexx.tokens)
     stream = list(Sanitizer().whitespace(stream, keep=[]))
     pars = Parser()
     pars.set_input(stream)
     return pars

示例#4

0

显示文件

文件： test_lex.py 项目： kr-g/pylexem

    def test_int_unit(self):
        inp_text = """
            -112 +110 110 
            """
        stream = self.lexx.tokenize(inp_text)
        self.stream = Sanitizer().whitespace(stream)

        res = list(map(lambda x: int(x[0]), self.stream))

        self.assertEqual(res, [-112, +110, 110])

示例#5

0

显示文件

文件： test_lex.py 项目： kr-g/pylexem

    def test_words(self):
        inp_text = """
            a quick brown fox jumps far away
            """
        stream = self.lexx.tokenize(inp_text)
        self.stream = Sanitizer().whitespace(stream)

        res = list(map(lambda x: str(x[0]), self.stream))

        self.assertEqual(
            res,
            "a quick brown fox jumps far away".split(),
        )

示例#6

0

显示文件

文件： test_lex.py 项目： kr-g/pylexem

    def test_complex(self):
        cmplx = -2 + 1j
        inp_text = str(cmplx)

        stream = self.lexx.tokenize(inp_text)
        self.stream = Sanitizer().whitespace(stream)
        self.stream = list(self.stream)

        res = [("(-2+1j)", "COMPLEX_NUM")]

        self.assertEqual(res, self.stream)
        self.assertEqual(complex(res[0][0]), cmplx)

        print("found", cmplx)

示例#7

0

显示文件

文件： test_lex.py 项目： kr-g/pylexem

    def test_float(self):
        inp_text = """
            0. +0. .0 +.0 0.0 +0.1 0.0e-1 +0.0e-1 0.0e1 .0e1 -.0e1
            """
        stream = self.lexx.tokenize(inp_text)
        self.stream = Sanitizer().whitespace(stream)

        res = list(map(lambda x: float(x[0]), self.stream))

        self.assertEqual(
            res,
            [
                0.0, +0.0, 0.0, +0.0, 0.0, +0.1, 0.0e-1, +0.0e-1, 0.0e1, 0.0e1,
                -0.0e1
            ],
        )

示例#8

0

显示文件

文件： test_lex.py 项目： kr-g/pylexem

    def test_empty_comments(self):
        inp_text = """
                /**/
                (**)
                # python comment
            """
        stream = self.lexx.tokenize(inp_text)
        self.stream = Sanitizer().whitespace(stream)
        self.stream = list(self.stream)

        res = [
            ("/**/", "BLOCK_COMMENT"),
            ("(**)", "BLOCK_ROUND_COMMENT"),
            ("# python comment", "EOL_COMMENT_PY"),
        ]

        self.assertEqual(res, self.stream)

示例#9

0

显示文件

文件： test_lex.py 项目： kr-g/pylexem

    def test_multiline_triple_quoted(self):
        inp_text = """
\"\"\"
    pythonic triple
    qouted multiline
    comment
    text with 'single' \t and "double" text inside
\"\"\"
        """
        stream = self.lexx.tokenize(inp_text)
        self.stream = Sanitizer().whitespace(stream)
        self.stream = list(self.stream)

        res = [(
            '"""\n    pythonic triple\n    qouted multiline\n    comment\n    text with \'single\' \t and "double" text inside\n"""',
            "TRIPLEQUOTED",
        )]

        self.assertEqual(res, self.stream)

示例#10

0

显示文件

文件： test_lex.py 项目： kr-g/pylexem

    def test_other_base_numbers(self):
        inp_text = """
            0x1234
            0b1011
            0o678
            0X1234
            0B1011
            0O678
            """
        stream = self.lexx.tokenize(inp_text)
        self.stream = Sanitizer().whitespace(stream)
        self.stream = list(self.stream)

        res = [
            ("0x1234", "HEXNUM"),
            ("0b1011", "BINNUM"),
            ("0o678", "OCTNUM"),
            ("0X1234", "HEXNUM"),
            ("0B1011", "BINNUM"),
            ("0O678", "OCTNUM"),
        ]

        self.assertEqual(res, self.stream)

示例#11

0

显示文件

文件： test_lex.py 项目： kr-g/pylexem

    def test_quoted_text(self):
        inp_text = """
                'single'
                'single\\nmultiline'
                'single\\'escaped'
                "double"
                "double\\multiline"
                "double\\"escaped"
            """
        stream = self.lexx.tokenize(inp_text)
        self.stream = Sanitizer().whitespace(stream)
        self.stream = list(self.stream)

        res = [
            ("'single'", "QUOTED"),
            ("'single\\nmultiline'", "QUOTED"),
            ("'single\\'escaped'", "QUOTED"),
            ('"double"', "DBLQUOTED"),
            ('"double\\multiline"', "DBLQUOTED"),
            ('"double\\"escaped"', "DBLQUOTED"),
        ]

        self.assertEqual(res, self.stream)

示例#12

0

显示文件

文件： sample.py 项目： kr-g/pylexem

(2-0.1j) (2-1.0j)
"""

print(inp_text)

tokens = RuleBuilder().add_all().build()

alltokens = Tokens().extend(tokens)

lexx = Lexer(alltokens, debug=not True, debugtime=True)

stream = lexx.tokenize(inp_text)

print("---")
print("---", "1 to 1 scanning result")
print("---")

for token in stream:
    print(token)

# remove the whitespace

stream = Sanitizer().whitespace(stream)

print("---")
print("---", "without whitespace")
print("---")

for token in stream:
    print(token)

示例#13

0

显示文件

文件： sample.py 项目： kr-g/pyparsesynt

from pyparsesynt.repr import ReprBase

from pyparsesynt.token import Token, TokenStream
from pyparsesynt.rule import Production, Call, Terminal, And, Or, Not, Optional, Repeat

tokens = RuleBuilder().add_all().build()
alltokens = Tokens().extend(tokens)
lexx = Lexer(alltokens, debug=not True, debugtime=False)

inp_text = """1 0 - + 7 8 seven - 1 4 + 5 + 6 six"""
# inp_text = """+ 1 0 - + 7 seven """

stream = lexx.tokenize(inp_text, )
lxtok = LexerTokens(lexx.tokens)

stream = list(Sanitizer().whitespace(stream, keep=[]))

pars = Parser()
pars.set_input(stream)

# p_assign = pars.Production("assignment", Terminal(":=="))

p_minus = pars.Production("minus", Terminal("-"))
p_plus = pars.Production("plus", Terminal(typ=lxtok.PLUS))

p_zahl_ohne_null_5 = pars.Production(
    "zahl_ohne_null",
    Or([Terminal(str(x)) for x in range(1, 5)]),
    alias="lower_5")

p_zahl_ohne_null_10 = pars.Production(