Exemplo n.º 1
0
def tokenize(options):
    try:
        filename = options.filename

        code = open(filename, encoding='utf-8').read()
        stream = StringStream(code)

        parser = LycParser()

        if 'output' in options:
            output = options.output
            encoder = options.encoder
            filler_token_value = Tokens.WHITESPACE.value if options.binary else Tokens.WHITESPACE.name
            for token, first_index, index_after in parser.tokenize_with_intervals(
                    stream):
                if token is None:
                    bytes_ = encoder(
                        (filler_token_value, first_index, index_after))
                else:
                    token_value = token.type.value if options.binary else token.type.name
                    bytes_ = encoder((token_value, first_index, index_after))
                output.write(bytes_)
        else:
            for token in parser.tokenize(stream):
                print(str(token))

    except CompilerError as e:
        print(e.trace)
Exemplo n.º 2
0
    def tokenize(message: list) -> list:
        time_ = time.time()
        if not 3 <= len(message) <= 4:
            return error(
                "Tokenization request format is:\n input: ['tokenize', file_name:str, file_contents:str, binary=False]\n output: ['tokenize', token_ranges:list(list(token_code, first_index, index_after))]"
            )
        file_name = message[1]
        file_contents = message[2]
        if not isinstance(file_name, str):
            return error(
                'Tokenization request: "file_name" arg must be a string.')
        if not isinstance(file_contents, str):
            return error(
                'Tokenization request: "file_contents" arg must be a string.')
        if VERBOSE:
            print("\tfile-name: " + file_name)
            print("\tfile-contents: " +
                  (repr(file_contents) if len(file_contents) < 80 else
                   repr(file_contents[0:80]) + " ..."))
        if len(message) == 4:
            binary = message[3]
            if not isinstance(file_contents, bool):
                return error(
                    'Tokenization request: "binary" arg must be a string.')
        else:
            binary = True

        stream = StringStream(file_contents, name=file_name)

        parser = AnokyParser()

        token_ranges = []
        current_index = 0
        try:
            for token in parser.tokenize(stream, emmit_restart_tokens=True):
                token_first = token.range.first_position.index
                token_after = token.range.position_after.index
                # if token_first > current_index:
                #     token_type = Tokens._TokenTypes.WHITESPACE.value if binary else Tokens._TokenTypes.WHITESPACE.name
                #     token_ranges.append([token_type, current_index, token_first])
                #     current_index = token_first
                # el
                if token_first < current_index:
                    raise Exception(
                        token_first,
                        "Overlapping tokens (%s, %s), something is wrong with the tokenizer!!!"
                        % (current_index, token_first))
                token_type = token.type.value if binary else token.type.name
                token_ranges.append([token_type, current_index, token_after])
                current_index = token_after
        except TokenizingError as e:
            return error(e)

        if len(token_ranges) > 0:
            last_token_range = token_ranges[-1]
            if last_token_range[2] < len(file_contents):
                last_token_range[2] += 1

        print("Tokenization took %s seconds" % (time_ - time.time()))
        return pack(['tokenize', token_ranges])
Exemplo n.º 3
0
    def _get_stream(self, code_or_stream: Union[str, CharacterStream]):
        if isinstance(code_or_stream, str):
            stream = StringStream(code_or_stream)
        else:
            assert isinstance(code_or_stream, CharacterStream)
            stream = code_or_stream

        return stream
Exemplo n.º 4
0
def interactive_anoky(options):
    options.filename = '<interactive>'
    sys.path = [''] + sys.path
    (CG, init_code) = code_generator.begin(interactive=True,
                                           special_forms=__special_forms__,
                                           macros=__macros__,
                                           id_macros=__id_macros__)
    interactive_history = InMemoryHistory()
    try:
        while True:
            written_code = prompt('>>> ',
                                  history=interactive_history,
                                  multiline=True)
            stream = StringStream(written_code, '<interactive>')
            try:
                node = anoky_tokenize(stream, options)
                if not options.arrange_tokens:
                    continue
                anoky_transduce(node, options)
                if not options.expand_macros:
                    continue
                anoky_expand(node, options)
                if not options.generate_code:
                    continue
                py_ast = anoky_generate(node, options, CG)
                py_ast = code_generator.end(py_ast, CG)
                if options.print_python_ast:
                    print_ast(py_ast)
                if options.print_python_code:
                    print_python_code(py_ast)
            except CompilerError as e:
                print(e.trace)
            except Exception:
                print(
                    '\n!—›–  Compiler raised unhandled exception (this is not supposed to happen)!!!  –‹—!'
                )
                traceback.print_exc()
            else:
                ast.fix_missing_locations(py_ast)
                try:
                    compiled_ast = compile(py_ast,
                                           filename='<interactive>',
                                           mode='single')
                except Exception:
                    print('\n——›–  AST compilation failed !!!  –‹——')
                    traceback.print_exc()
                    print_ast(py_ast)
                    print_python_code(py_ast)
                else:
                    if options.execute:
                        try:
                            exec(compiled_ast)
                        except Exception as e:
                            traceback.print_exc()
    except EOFError:
        return
    except KeyboardInterrupt:
        return
Exemplo n.º 5
0
    async def async_tokenize(id, incomming, outgoing):
        def my_send_message(msg):
            if VERBOSE: print("\treply: " + str(msg))
            return outgoing.push_message(pack(msg))

        def my_error(e):
            nonlocal outgoing
            if VERBOSE: print("\terror: " + str(e))
            return outgoing.push_message(error(e))

        # first message (see below for syntax)
        # It will give us the filename name and contents of the written code,
        # and also whether we should mark the first offset as being anything other than zero,
        # and the indentation level at which the code is written
        message = await incomming()

        if not 3 <= len(message) <= 5:
            return outgoing.push_message(
                error(
                    "Async tokenization request format is:\n"
                    " first message: ['async_tokenize', file_name:str, file_contents:str, first_offset:int = 0, indentation_level:int = 0]\n"
                    " first reply: ['async_tokenize', handler_id:int]\n"
                    " following messages: ['async_tokenize_next', handler_id:int]\n"
                    " reply: ['async_tokenize_next', token_code, first_index, index_after]\n"
                    " ending_message: ['close', handler_id:int]\n"
                    " reply: ['close']"
                    "at any moment, reply may be:"
                    "  ['async_tokenize_error', message:str, first_position?:int, position_after?:int]"
                ))

        file_name = message[1]
        file_contents = message[2]
        if not isinstance(file_name, str):
            return my_error(
                'Async tokenization request: "file_name" arg must be a string.'
            )
        if not isinstance(file_contents, str):
            return my_error(
                'Async tokenization request: "file_contents" arg must be a string.'
            )

        if VERBOSE:
            print("\tfile-name: " + file_name)
            print("\tfile-contents: " +
                  (repr(file_contents) if len(file_contents) < 80 else
                   repr(file_contents[0:80]) + " ..."))
            if len(message) >= 4: print("\toffset: %s " % message[3])
            if len(message) >= 5: print("\tindentation: %s" % message[4])

        # Get global offset of first character, if any
        if len(message) >= 4:
            shift = message[3]
            if not isinstance(shift, int):
                return my_error(
                    'Tokenization request: "first_offset" arg must be an integer.'
                )
        else:
            shift = 0

        # get indentation level of code, if any
        if len(message) >= 5:
            indentation_level = message[4]
            if not isinstance(indentation_level, int):
                return my_error(
                    'Tokenization request: "indentation_level" arg must be an integer.'
                )
        else:
            indentation_level = 0

        # reply with the id of this async tokenization handler
        my_send_message(['async_tokenize', id])

        # Now the tokenization actually begins
        # We will tokenize each token, and between tokens we wait for the request of the next token.

        # First we prepare the stream, with the right shift and indentation level
        stream = StringStream(file_contents, name=file_name)

        if indentation_level > 0:
            stream = IndentedCharacterStream(stream)
            stream.readn(indentation_level)
            stream.push()

        # Then we tokenize the given text,
        parser = AnokyParser()
        current_index = indentation_level
        try:
            for token in parser.tokenize(stream, emmit_restart_tokens=True):
                token_first = token.range.first_position.index
                token_after = token.range.position_after.index
                # if token_first > current_index:
                #     token_type = Tokens._TokenTypes.WHITESPACE.value
                #     # We wait for the next token request, and emit a whitespace filler to the outgoing socket
                #     message = await incomming()
                #     if VERBOSE: print("\tmessage: %s" % message)
                #     assert len(message) >= 2 and message[1] == id
                #     if message[0] == 'close':
                #         my_send_message(['close'])
                #         return
                #     elif message[0] == 'async_tokenize_next':
                #         my_send_message(['async_tokenize_next', token_type, current_index+shift, token_first+shift])
                #     else:
                #         return my_error("Unkown message for async_tokenize handler, '%s'." % message[0])
                #     current_index = token_first
                # el
                if token_first < current_index:
                    raise Exception(
                        token_first,
                        "Overlapping tokens (%s, %s), something is wrong with the tokenizer!!!"
                        % (current_index + shift, token_first + shift))
                token_type = token.type.value

                # Now that we know the next token type, we wait for the next token request,
                # and emit it to the outgoing socket
                message = await incomming()
                if VERBOSE:
                    print("\tmessage: " + str(message))
                assert len(message) >= 2 and message[1] == id
                if message[0] == 'close':
                    my_send_message(['close'])
                    return
                elif message[0] == 'async_tokenize_next':
                    my_send_message([
                        'async_tokenize_next', token_type,
                        current_index + shift, token_after + shift
                    ])
                else:
                    return my_error(
                        "Unkown message for async_tokenize handler, '%s'." %
                        message[0])
                current_index = token_after
        except TokenizingError as e:
            return my_error(e)

        while True:
            message = await incomming()
            if VERBOSE: print("\tmessage: %s" % message)
            assert len(message) >= 2 and message[1] == id
            if message[0] == 'close':
                my_send_message(['close'])
                return
            elif message[0] == 'async_tokenize_next':
                my_send_message(['async_tokenize_next', -1, -1, -1])
            else:
                return my_error(
                    "Unkown message for async_tokenize handler, '%s'." %
                    message[0])

        return
Exemplo n.º 6
0
    def colorize(message: list) -> list:
        if not 3 <= len(message) <= 4:
            return error(
                "Colorization request format is:\n input: ['colorize', file_name:str, file_contents:str, binary=False]\n output: ['colorize', token_ranges:list(list(color_code, first_index, index_after))]"
            )
        file_name = message[1]
        file_contents = message[2]
        if not isinstance(file_name, str):
            return error(
                'Colorization request: "file_name" arg must be a string.')
        if not isinstance(file_contents, str):
            return error(
                'Colorization request: "file_contents" arg must be a string.')
        if VERBOSE:
            print("\tfile-name: " + file_name)
            print("\tfile-contents: " +
                  (repr(file_contents) if len(file_contents) < 80 else
                   repr(file_contents[0:80]) + " ..."))
        if len(message) == 4:
            binary = message[3]
            if not isinstance(file_contents, bool):
                return error(
                    'Colorization request: "binary" arg must be a string.')
        else:
            binary = True

        stream = StringStream(file_contents, name=file_name)

        parser = AnokyParser()
        code_expander = DefaultExpander()
        code_generator = DefaultGenerator()

        try:
            node = parser.parse(stream)
            code_expander.expand_unit(node)
            code_generator.generate_unit(node)

            colorized_tokens = []

            def extract_colorized_tokens(element):
                nonlocal colorized_tokens
                if element.color is not None and is_not_none(
                        element,
                        ".range.first_position.index") and is_not_none(
                            element, ".range.position_after.index"):
                    token_color = element.color
                    token_first = element.range.first_position.index
                    token_after = element.range.position_after.index
                    if not isinstance(token_color, int):
                        return error(
                            'Colorization request: color of token "%s" was not int!'
                            % element.text)
                    colorized_tokens.append(
                        [token_color, token_first, token_after])
                if isinstance(element.code, Node):
                    for subelement in element.code:
                        extract_colorized_tokens(subelement)

            for element in node:
                extract_colorized_tokens(element)

        except CompilerError as e:
            return error(e)

        return pack(['colorize', colorized_tokens])
Exemplo n.º 7
0
    async def async_tokenize(id, incomming, outgoing):
        def my_send_message(msg):
            if VERBOSE: print("\treply: " + str(msg))
            return outgoing.push_message(pack(msg))

        def my_error(e):
            nonlocal outgoing
            if VERBOSE: print("\terror: " + str(e))
            return outgoing.push_message(error(e))

        # first message (see below for syntax)
        # It will give us the filename name and contents of the written code,
        # and also whether we should mark the first offset as being anything other than zero,
        # and the indentation level at which the code is written
        message = await incomming()

        if not 3 <= len(message) <= 5:
            return outgoing.push_message(error(
                "Async tokenization request format is:\n"
                " first message: ['async_tokenize', file_name:str, file_contents:str, first_offset:int = 0, indentation_level:int = 0]\n"
                " first reply: ['async_tokenize', handler_id:int]\n"

                " following messages: ['async_tokenize_next', handler_id:int]\n"
                " reply: ['async_tokenize_next', token_code, first_index, index_after]\n"

                " ending_message: ['close', handler_id:int]\n"
                " reply: ['close']"

                "at any moment, reply may be:"
                "  ['async_tokenize_error', message:str, first_position?:int, position_after?:int]"))

        file_name = message[1]
        file_contents = message[2]
        if not isinstance(file_name, str):
            return my_error('Async tokenization request: "file_name" arg must be a string.')
        if not isinstance(file_contents, str):
            return my_error('Async tokenization request: "file_contents" arg must be a string.')

        if VERBOSE:
            print("\tfile-name: " + file_name)
            print("\tfile-contents: " + (
            repr(file_contents) if len(file_contents) < 80 else repr(file_contents[0:80]) + " ..."))
            if len(message) >= 4: print("\toffset: %s " % message[3])
            if len(message) >= 5: print("\tindentation: %s" % message[4])

        # Get global offset of first character, if any
        if len(message) >= 4:
            shift = message[3]
            if not isinstance(shift, int):
                return my_error('Tokenization request: "first_offset" arg must be an integer.')
        else:
            shift = 0

        # get indentation level of code, if any
        if len(message) >= 5:
            indentation_level = message[4]
            if not isinstance(indentation_level, int):
                return my_error('Tokenization request: "indentation_level" arg must be an integer.')
        else:
            indentation_level = 0

        # reply with the id of this async tokenization handler
        my_send_message(['async_tokenize', id])


        # Now the tokenization actually begins
        # We will tokenize each token, and between tokens we wait for the request of the next token.

        # First we prepare the stream, with the right shift and indentation level
        stream = StringStream(file_contents, name=file_name)

        if indentation_level > 0:
            stream = IndentedCharacterStream(stream)
            stream.readn(indentation_level)
            stream.push()

        # Then we tokenize the given text,
        parser = AnokyParser()
        current_index = indentation_level
        try:
            for token in parser.tokenize(stream, emmit_restart_tokens=True):
                token_first = token.range.first_position.index
                token_after = token.range.position_after.index
                # if token_first > current_index:
                #     token_type = Tokens._TokenTypes.WHITESPACE.value
                #     # We wait for the next token request, and emit a whitespace filler to the outgoing socket
                #     message = await incomming()
                #     if VERBOSE: print("\tmessage: %s" % message)
                #     assert len(message) >= 2 and message[1] == id
                #     if message[0] == 'close':
                #         my_send_message(['close'])
                #         return
                #     elif message[0] == 'async_tokenize_next':
                #         my_send_message(['async_tokenize_next', token_type, current_index+shift, token_first+shift])
                #     else:
                #         return my_error("Unkown message for async_tokenize handler, '%s'." % message[0])
                #     current_index = token_first
                # el
                if token_first < current_index:
                    raise Exception(token_first,
                                    "Overlapping tokens (%s, %s), something is wrong with the tokenizer!!!" % (
                                    current_index+shift, token_first+shift))
                token_type = token.type.value

                # Now that we know the next token type, we wait for the next token request,
                # and emit it to the outgoing socket
                message = await incomming()
                if VERBOSE:
                    print("\tmessage: " + str(message))
                assert len(message) >= 2 and message[1] == id
                if message[0] == 'close':
                    my_send_message(['close'])
                    return
                elif message[0] == 'async_tokenize_next':
                    my_send_message(['async_tokenize_next', token_type, current_index+shift, token_after+shift])
                else:
                    return my_error("Unkown message for async_tokenize handler, '%s'." % message[0])
                current_index = token_after
        except TokenizingError as e:
            return my_error(e)

        while True:
            message = await incomming()
            if VERBOSE: print("\tmessage: %s" % message)
            assert len(message) >= 2 and message[1] == id
            if message[0] == 'close':
                my_send_message(['close'])
                return
            elif message[0] == 'async_tokenize_next':
                my_send_message(['async_tokenize_next', -1, -1, -1])
            else:
                return my_error("Unkown message for async_tokenize handler, '%s'." % message[0])

        return
Exemplo n.º 8
0
 def __init__(self, filepath, encoding='utf-8'):
     with open(filepath, 'r', encoding=encoding) as content_file:
         content = content_file.read()
     StringStream.__init__(self, content, filepath)