示例#1
0
    def assign(self) -> Node:
        """
        Based on a left-member expression, check that we are dealing with an
        assign, be mutable or immutable.

        This method first looks for an expression. If the expressions exits,
        then it has likely stored a token. We match this token against an
        assign operator ('<-' or '->'). If the token does not match, then we
        return the expression node.

        The expression expects to fill out the line, so the next token should
        be something like EOL. If it is not, but it is an assign operator,
        either '<-' or '->' then this method evaluates an assignment instead.

        If the operator is a left assign, a variable, that must be a name, and
        not an expressed is being assigned an expression (or a literal).

        If the operator is a right assign, then an expression (or a literal) is
        assigned to an immutable name. If name fails, for instance there is an
        expression, or if the token after is not an end of line, then an error
        is returned.

        If there is no assign operator, this method makes sure that there is no
        let keyword leading the orphan expression.
        """
        node, let = self.let(
        )  # There can be an optional let in assign statement.

        if self.token is not None and self.token == Symbol.LASSIGN:
            if node.name not in (NodeType.Name, NodeType.Let):
                raise LythSyntaxError(node.info,
                                      msg=LythError.LEFT_MEMBER_IS_EXPRESSION)

            else:
                token = self.token
                self.token = None
                node = Node(token, node, self.expression())

        elif self.token is not None and self.token == Symbol.RASSIGN:
            token = self.token
            self.token = None
            node = Node(token, self.name(), node)

            if next(self.lexer) != Symbol.EOL:
                raise LythSyntaxError(node.info,
                                      msg=LythError.GARBAGE_CHARACTERS)

        elif let and node.name != NodeType.Class:
            raise LythSyntaxError(let.info, msg=LythError.LET_ON_EXPRESSION)

        return Node(let, node) if let is not None else node
示例#2
0
    def expression(self, end: Symbol = Symbol.EOL) -> Node:
        """
        Looking for a line that could lead to an expression, that is, a series
        of operations.

        There should be one expression per line, or one expression per pair of
        parentheses. This is why this method is not a while loop.

        Expression raises an Exception if it detects trailing characters. The
        exception however, is bypassed in case of an assignment. In this case,
        the expression returns the node, and the current assignment token for
        the assign method to run.

        The end parameter determines the token the expression expects to stop.
        In some cases, expression is started by an opening parenthesis, then
        the method should have been called with an expected right parenthesis
        to stop it. The default token otherwise is the end of a line as multi
        line is not yet supported by lyth.
        """
        node = self.addition()

        if self.token in (Symbol.LASSIGN, Symbol.RASSIGN):
            return node

        elif node.name == NodeType.Name and self.token in (Symbol.COLON,
                                                           Keyword.BE):
            return self.classdef(node)

        elif self.token is not None and self.token.symbol is not end:
            print(node)
            print(self.token)
            raise LythSyntaxError(node.info, msg=LythError.GARBAGE_CHARACTERS)

        self.token = None
        return node
示例#3
0
    def classdef(self, name: Node, end: Symbol = Symbol.EOL) -> Node:
        """
        Looking for a class definition.

        Causes to fetch the block and append to the class node that is built
        subsequent lines of codes until the next dedent.
        """
        token = self.token or next(self.lexer)

        if token == Keyword.BE:
            self.token = next(self.lexer)
            type_node = Node.typedef(self.name())
            token = next(self.lexer)

        else:
            # node = Node.classdef(name)
            type_node = None

        if token != Symbol.COLON:
            raise LythSyntaxError(node_type.info,
                                  msg=LythError.GARBAGE_CHARACTERS)

        self.token = None
        try:
            node = Node.classdef(name, type_node, *self.block())

        except StopIteration:
            raise

        return node
示例#4
0
    def __init__(self,
                 lexeme: str,
                 scan: Scanner,
                 force_literal=False) -> None:
        """
        Instantiate a new Token.

        Instantiates a new Token object if the provided symbol is a _Lexeme. If
        not, it returns an exception to the scanner saying that the symbol is
        invalid.

        Raises:
            LythSyntaxError: The character being scanned could not lead to a
                             token.
        """
        symbol = Symbol.as_value(lexeme)

        if symbol is not None:
            self.symbol = symbol

        elif lexeme.isdigit():
            self.symbol = Literal.VALUE

        elif lexeme.isalpha() or lexeme == '_':
            self.symbol = Literal.STRING

        else:
            raise LythSyntaxError(scan, msg=LythError.INVALID_CHARACTER)

        self.literal = force_literal
        self.info = TokenInfo(scan.filename, scan.lineno, scan.offset,
                              scan.line)
        self.lexeme = lexeme
        self.quotes = 1 if self.symbol is Symbol.QUOTE else 0
示例#5
0
    def name(self) -> Node:
        """
        Looking for a name token.

        Literal does not expect the line to be terminated, or the source code
        to have an end. If it is the case, then an exception saying that it was
        unsuccessful is raised instead.
        """
        token = self.token or self.lexer()

        if token in (Symbol.EOF, Symbol.EOL):
            raise LythSyntaxError(token.info, msg=LythError.INCOMPLETE_LINE)

        elif token != Literal.STRING:
            raise LythSyntaxError(token.info, msg=LythError.NAME_EXPECTED)

        return Node(token)
示例#6
0
    def literal(self) -> Node:
        """
        Looking for a literal token to make it a numeral, or a name.

        Literal does not expect the line to be terminated, or the source code
        to have an end. If it is the case, then an exception saying that it was
        unsuccessful is raised instead.

        If the token is an opening parenthesis, then the corresponding node to
        return will not be a literal, rather a new expression needs to be
        evaluated.

        If the token being parsed is not a literal of type value, then it also
        raises an exception saying the symbol is invalid and that it should be
        a literal instead.

        The token may already have been scanned by let. In all cases the
        current token, even if none, must be consumed, otherwise the expression
        will evaluate with a literal token.
        """
        token = self.token or self.lexer()
        self.token = None
        if token in (Symbol.EOF, Symbol.EOL):
            raise LythSyntaxError(token.info, msg=LythError.INCOMPLETE_LINE)

        elif token == Symbol.LPAREN:
            return self.expression(end=Symbol.RPAREN)

        elif token == Symbol.DOC:
            self.docstring()
            return self.literal()

        elif token not in (Literal.VALUE, Literal.STRING):
            raise LythSyntaxError(token.info, msg=LythError.LITERAL_EXPECTED)

        return Node(token)
示例#7
0
    def block(self) -> List[Node]:
        """
        Processing a list of indented statements following a colon.

        Original token is provided as parameter to help this method wraps the
        node around the token, fill its statement attribute and return it. For
        this, the block method requires the node constructor, and the token.
        """
        statements = []
        self.indent += 1

        while True:
            new_token = self.token or self.lexer()

            if new_token == Symbol.EOL:
                self.token = None
                continue

            if new_token == Symbol.EOF:
                self.token = new_token
                return statements

            if new_token != Symbol.INDENT:
                raise LythSyntaxError(new_token.info,
                                      msg=LythError.INCONSISTENT_INDENT)

            if new_token.lexeme <= self.indent - 1:
                self.indent = new_token.lexeme
                self.token = new_token
                return statements

            if new_token.lexeme != self.indent:
                raise LythSyntaxError(new_token.info,
                                      msg=LythError.INCONSISTENT_INDENT)

            statements.append(self.assign())
示例#8
0
    def visit_immutableassign(self, node, context: Context) -> None:
        """
        An assign operator requesting immediate assistance.

        This method raises an exception if we try to reassign a value that is
        already present in the symbol table..
        """
        name = self.visit(node.left, Context.STORE)
        symbol = self.table.get((name, self.scope), None)

        if symbol is not None:
            raise LythSyntaxError(node.info, msg=LythError.REASSIGN_IMMUTABLE)

        else:
            self.table += Name(
                name, self.scope,
                SymbolType(Field.UNKNOWN, Field.IMMUTABLE,
                           self.visit(node.right, Context.LOAD)))
示例#9
0
    def __call__(self) -> Token:
        """
        Finalizes the token.

        In some cases it is simple to convert the string of the lexeme to the
        right type once it is finished, rather than letting the analyzer does
        it.

        If the token is an indent, the lexeme is the number of indents. The
        number of indents must be even, or an exception is raised.
        """
        if self.symbol is Literal.VALUE:
            self.lexeme = int(self.lexeme)

        elif self.symbol is Symbol.INDENT:
            if len(self.lexeme) % 2:
                raise LythSyntaxError(self.info, msg=LythError.UNEVEN_INDENT)

            self.lexeme = len(self.lexeme) // 2

        return self
示例#10
0
    def visit_name(self, node: Node,
                   context: Context) -> Union[str, int, Field]:
        """
        A variable requires its name to be returned.

        If the context is to store the result of an expression into a variable,
        usually writing a symbol to the symbol table, then this method returns
        a name.

        If the context is to load the value referenced by this name, usually
        reading a symbol from the symbol table, then this method returns the
        value in the symbol table (or return an error if the variable is
        referenced before it was assigned any value in the symbol table.)
        """
        if context is Context.STORE:
            return node.value

        symbol = self.table.get((node.value, self.scope), None)
        if symbol is None:
            raise LythSyntaxError(
                node.info, msg=LythError.VARIABLE_REFERENCED_BEFORE_ASSIGNMENT)

        return symbol.type.value
示例#11
0
    def let(self) -> Tuple[Node, Optional[Token]]:
        """
        Is there any let keyword that wants to come out?

        Let keyword declares a node to be declared publicly in our tree of
        symbol. It can be an assign, a class, an enum, a struct etc. or even a
        list of them.
        """
        token = self.lexer()
        if token == Keyword.LET:

            next_token = self.lexer()

            #
            # 1. Multiple statements let
            #
            if next_token == Symbol.COLON:
                eol = self.lexer()

                if eol != Symbol.EOL:
                    raise LythSyntaxError(eol.info,
                                          msg=LythError.GARBAGE_CHARACTERS)

                return Node(token, *self.block()), None

            #
            # 2. Single statement let
            #
            self.token = next_token
            return self.expression(), token

        #
        # 3. No let detected
        #
        self.token = token
        return self.expression(), None
示例#12
0
    def __add__(self, lexeme: str) -> Token:
        """
        Add a scanned character to an existing token.

        This method validates that the character appended to the existing token
        keeps the integrity of the token. For example, if the token is made of
        digits, it is important that the next characters are digits as well.
        Sometimes the token type changes as well. The comparator '<' could
        become an assignment if '-' is the next character being scanned.

        The methodology is the following:
        1. Appending space to an indent token leads to an indent token with a
           lexeme of incremented size.
        2. If the new lexeme appended to current lexeme leads to a new symbol,
           update symbol and new lexeme, and return this instance.
        3. If the new literal would be a symbol appended to a literal, there is
           clearly a missing space. Exception, such as '5!' will be corrected
           by the lexer.
        4. Appending a digit to a literal leads to appending the lexeme and
           returning current token.
        5. Appending an alphanumerical character, or '_', to a string value
           leads to appending that character to the lexeme and returning
           current token. If the lexeme becomes a lyth keyword, then the token
           symbol is changed to corresponding keyword.
        6. Appending an alphanumerical character, or '_', to a keyword causes
           it to be demoted back to string symbol.
        7. Appending an alphanumerical character, or '_', leading to a literal
           right after a symbol, without the presence of a space leads to an
           error. Exception, such as '-5' will be corrected by the lexer.
        8. Appending a quote to a quote leaves the method unchanged and the
           same quote symbol is returned. It is up to the lexer to count the
           number of quotes in order to build a docstring.
        """
        if self.literal:
            if lexeme == '"':
                self.symbol = Symbol.QUOTE
                self.quotes += 1

            else:
                self.symbol = Literal.STRING
                self.lexeme += lexeme
            return self

        if self.symbol is Symbol.INDENT and lexeme == ' ':
            self.lexeme += lexeme
            return self

        symbol = Symbol.as_value(self.lexeme + lexeme)

        if symbol is not None:
            self.symbol = symbol
            self.lexeme += lexeme
            return self

        symbol = Symbol.as_value(lexeme)

        if symbol is not None and self.symbol in Literal:
            raise LythSyntaxError(self.info,
                                  msg=LythError.MISSING_SPACE_BEFORE_OPERATOR)

        elif lexeme.isdigit() and self.symbol in Literal:
            self.lexeme += lexeme
            return self

        elif (lexeme.isalnum()
              or lexeme == '_') and self.symbol is Literal.STRING:
            self.lexeme += lexeme
            self.symbol = Keyword.as_value(self.lexeme) or self.symbol
            return self

        elif (lexeme.isalnum() or lexeme == '_') and self.symbol in Keyword:
            self.lexeme += lexeme
            self.symbol = Literal.STRING
            return self

        elif (lexeme.isalnum() or lexeme == '_') and self.symbol in Symbol:
            raise LythSyntaxError(self.info,
                                  msg=LythError.MISSING_SPACE_AFTER_OPERATOR)

        elif (lexeme == '"' and self.symbol is Symbol.QUOTE):
            self.quotes += 1
            return self

        else:
            raise LythSyntaxError(self.info, msg=LythError.SYNTAX_ERROR)
示例#13
0
    def next(self) -> Token:
        """
        Get the next token in source being scanned.

        This method assumes spaces as delimiters. Spaces in python comprise
        escape characters (feed line etc.) as well. It yields tokens upon space
        and successive spaces are ignored.

        There are multiple case to consider here.
        1. A space is detected and a token is being built. The generator yields
           the token, effectively stopping its construction.
        2. A space is detected and an indent token is being built. The
           generator appends the space to this indent.
        3. If the space is a feed line character, the generator yields a new
           EOL token right after.
        4. If the space is in first column, this is the beginning of an indent
           and a corresponding token is instantiated.
        5. Other spaces following are ignored, looping through the while loop
           to retrieve another character (and so on)
        6. If it is not a space and it ends an indent, the generator yields
           first the indent.
        7. If it is not a space and no token is present, then we start creating
           one.
        8. If a colon is following directly another token, we stop building the
           token, return it, and generate a colon token.
        9. If it is not a space and a token is present, then we continue the
           construction of the current token.
        10. One quote leads to a quote token, two quotes lead to two quote
            tokens, three quotes lead to a doc token.

        When the end of file is reached:
        1. If the scanner reached the end of its source, and the last token is
           not an EOL located at the begining of previous file, then the
           generator pedanticly asks for an empty line.
        2. EOF is treated as an empty space, the generator yields the last
           token, effectively stopping its construction.
        3. The generator then adds an EOF token and leaves the while loop,
           causing the generator to raise StopIteration on future next() calls.

        Exceptions can be ignored:
        1. The scanner pedanticly requires that symbols are separated with
           spaces. However '+5', '-5' and '5!' are examples of valid
           expressions. The generator yields current token and starts a new
           one.
        """
        token = None
        in_doc = False

        while True:
            try:
                char = self.scanner()

                if char.isspace():
                    #
                    # 1. A space is detected, and a token is being built.
                    #
                    if token is not None and token != Symbol.INDENT:
                        yield token()

                    #
                    # 2. A space is detected, and an indent token is being
                    #    built.
                    #
                    elif token is not None and token == Symbol.INDENT:
                        token += ' '
                        continue

                    #
                    # 3. If the space is a feed line character, the generator
                    #    inserts a new EOL token.
                    #
                    if char == '\n':
                        yield Token('\n', self.scanner, in_doc)

                    token = None

                    #
                    # 4. If the space is in the first column, this is the
                    #    beginning of an indent.
                    #
                    if self.scanner.offset == 0:
                        token = Token(' ', self.scanner, in_doc)
                        continue

                    #
                    # 5. Other spaces following are ignored.
                    #
                    continue

                #
                # 6. If it is not a space and it ends an indent, the generator
                #    returns the indent first
                #
                if token is not None and token == Symbol.INDENT:
                    yield token()
                    token = None

                #
                # 7. If it is not a space and no token is present, then we
                #    start defining a new token
                #
                if token is None:
                    token = Token(char, self.scanner, in_doc)
                    if token == Symbol.COLON:
                        raise LythSyntaxError(
                            token.info, msg=LythError.TOO_MUCH_SPACE_BEFORE)

                #
                # 8. A colon token is following directly another token
                #
                elif token is not None and char == ':':
                    yield token
                    token = Token(char, self.scanner, in_doc)

                #
                # 9. If it is not a space and a token is present, then we
                #     append the character to the token.
                #
                else:
                    token += char

                    # 10. One quote leads to a quote token, two quotes lead to two quote
                    #    tokens, three quotes lead to a doc token.
                    if token == Symbol.QUOTE and token.quotes == 3:
                        yield Token('"""', self.scanner, in_doc)()
                        in_doc = not in_doc
                        token = None

            except StopIteration:
                if token is not None and (token.symbol is not Symbol.EOL
                                          or token.lineno != 0):
                    raise LythSyntaxError(
                        token.info, msg=LythError.MISSING_EMPTY_LINE) from None

                yield Token(None, self.scanner, in_doc)
                break

            except LythSyntaxError as error:
                if error.msg is LythError.MISSING_SPACE_AFTER_OPERATOR:
                    if token is not None and token.symbol in (Symbol.ADD,
                                                              Symbol.SUB,
                                                              Symbol.LPAREN):
                        yield token()
                        token = Token(char, self.scanner, in_doc)
                        continue

                elif error.msg is LythError.MISSING_SPACE_BEFORE_OPERATOR:
                    new_token = Token(char, self.scanner, in_doc)
                    if new_token.symbol is Symbol.RPAREN \
                       or token.symbol is Literal.STRING and new_token.symbol is Symbol.LPAREN:
                        yield token()
                        token = new_token
                        continue

                raise