示例#1
0
    def make_lexing_code(self):
        from cflexer.codebuilder import Codebuilder
        result = Codebuilder()
        result.start_block("def recognize(runner, i):")
        result.emit("#auto-generated code, don't edit")
        result.emit("assert i >= 0")
        result.emit("input = runner.text")
        result.emit("state = 0")
        result.start_block("while 1:")
        state_to_chars = {}
        for (state, char), nextstate in self.transitions.iteritems():
            state_to_chars.setdefault(state, {}).setdefault(nextstate,
                                                            set()).add(char)
        state_to_chars_sorted = state_to_chars.items()
        state_to_chars_sorted.sort()
        above = set()
        for state, nextstates in state_to_chars_sorted:
            above.add(state)
            with result.block("if state == %s:" % (state, )):
                if state in self.final_states:
                    result.emit("runner.last_matched_index = i - 1")
                    result.emit("runner.last_matched_state = state")
                with result.block("try:"):
                    result.emit("char = input[i]")
                    result.emit("i += 1")
                with result.block("except IndexError:"):
                    result.emit("runner.state = %s" % (state, ))
                    result.emit("runner.reachedend = True")
                    if state in self.final_states:
                        result.emit("return i")
                    else:
                        result.emit("return ~i")
                elif_prefix = ""
                for nextstate, chars in nextstates.iteritems():
                    final = nextstate in self.final_states
                    compressed = compress_char_set(chars)
                    if nextstate in above:
                        continue_prefix = "continue"
                    else:
                        continue_prefix = ""
                    for i, (a, num) in enumerate(compressed):
                        if num < 3:
                            for charord in range(ord(a), ord(a) + num):
                                with result.block("%sif char == %r:" %
                                                  (elif_prefix, chr(charord))):
                                    result.emit("state = %s" % (nextstate, ))
                                    result.emit(continue_prefix)
                                if not elif_prefix:
                                    elif_prefix = "el"
                        else:
                            with result.block(
                                    "%sif %r <= char <= %r:" %
                                (elif_prefix, a, chr(ord(a) + num - 1))):
                                result.emit("state = %s" % (nextstate, ))
                                result.emit(continue_prefix)
                            if not elif_prefix:
                                elif_prefix = "el"
                with result.block("else:"):
                    result.emit("break")
        for state in range(self.num_states):
            if state in state_to_chars:
                continue
            assert state in self.final_states
        result.emit("""
runner.last_matched_state = state
runner.last_matched_index = i - 1
runner.state = state
if i == len(input):
    return i
else:
    return ~i
break""")
        result.end_block("while")
        result.emit("""
runner.state = state
return ~i""")
        result.end_block("def")
        result = result.get_code()
        while "\n\n" in result:
            result = result.replace("\n\n", "\n")
        exec py.code.Source(result).compile()
        return recognize
示例#2
0
    def make_code(self):
        from rpython.rlib.parsing.codebuilder import Codebuilder
        result = Codebuilder()
        result.start_block("def recognize(input):")
        result.emit("i = 0")
        result.emit("state = 0")
        result.start_block("while 1:")

        # state_to_chars is a dict containing the sets of
        #   Ex: state_to_chars = { 0: set('a','b','c'), ...}
        state_to_chars = {}
        for (state, char), nextstate in self.transitions.iteritems():
            state_to_chars.setdefault(state, {}).setdefault(nextstate,
                                                            set()).add(char)
        above = set()
        for state, nextstates in state_to_chars.iteritems():
            above.add(state)
            with result.block("if state == %s:" % (state, )):
                with result.block("if i < len(input):"):
                    result.emit("char = input[i]")
                    result.emit("i += 1")
                with result.block("else:"):
                    if state in self.final_states:
                        result.emit("return True")
                    else:
                        result.emit("break")
                elif_prefix = ""
                for nextstate, chars in nextstates.iteritems():
                    compressed = compress_char_set(chars)
                    if nextstate in above:
                        continue_prefix = "continue"
                    else:
                        continue_prefix = ""
                    for i, (a, num) in enumerate(compressed):
                        if num < 5:
                            for charord in range(ord(a), ord(a) + num):
                                with result.block("%sif char == %r:" %
                                                  (elif_prefix, chr(charord))):
                                    result.emit("state = %s" % (nextstate, ))
                                    result.emit(continue_prefix)
                                if not elif_prefix:
                                    elif_prefix = "el"
                        else:
                            with result.block(
                                    "%sif %r <= char <= %r:" %
                                (elif_prefix, a, chr(ord(a) + num - 1))):
                                result.emit("state = %s" "" % (nextstate, ))
                                result.emit(continue_prefix)
                            if not elif_prefix:
                                elif_prefix = "el"
                with result.block("else:"):
                    result.emit("break")
        for state in range(self.num_states):
            if state in state_to_chars:
                continue
            with result.block("if state == %s:" % (state, )):
                with result.block("if i == len(input):"):
                    result.emit("return True")
                with result.block("else:"):
                    result.emit("break")
        result.emit("break")
        result.end_block("while")
        result.emit("raise LexerError(input, state, i)")
        result.end_block("def")
        result = result.get_code()
        while "\n\n" in result:
            result = result.replace("\n\n", "\n")
        d = {'LexerError': LexerError}
        exec py.code.Source(result).compile() in d
        return d['recognize']