示例#1
0
class Parser(object):
    def __init__(self, filename):
        self._lexer = Lexer(filename)   # Lexical analyzer instance
        self._lexer.analyze()           # Let the lexer do it's thing
        self._command_type = None       # The type of command we're parsing now
        self._arg1 = None               # The first arg of the current command
        self._arg2 = None               # The second arg of the current command
        self._token = None
        self._lexeme = None

        # VM IL command dispatch table
        # Commands can have an arity of 0, 1, or 2
        self._commands = {k:self._nullary for k 
                          in ('add', 'sub', 'neg', 'eq', 'gt', 'lt',
                              'and', 'or', 'not', 'return')}
        self._commands.update({k:self._unary for k 
                               in ('label', 'goto', 'if-goto')})
        self._commands.update({k:self._binary for k 
                               in ('push', 'pop', 'function', 'call')})

    def _get_next_token(self):
        """Populates the current token and lexeme with the next from the lexer
        """
        self._token, self._lexeme = self._lexer.get_next_token()

    def _peek_next_token(self):
        """Returns the next token without removing it from the input
        """
        return self._lexer.peek_next_token()

    def has_more_commands(self):
        """Returns True is there are more tokens in the input
        """
        return self._lexer.has_more_tokens()

    def _parse_command(self, func):
        """Simple helper method for the command dispatch table
        """
        func()

    def _nullary(self):
        self._command_type = self._token
        if self._command_type == VmToken.ARITHMETIC:
            self._arg1 = self._lexeme

    def _unary(self):
        self._command_type = self._token
        self._get_next_token()
        self._arg1 = self._lexeme

    def _binary(self):
        self._unary()
        self._get_next_token()
        self._arg2 = self._lexeme

    def advance(self):
        """Gets the next command
        """
        try:
            self._arg1 = None
            self._arg2 = None
            self._get_next_token()
            self._parse_command(self._commands[self._lexeme])
        except Exception as ex:
            self._command_type = VmToken.ERROR
            self._symbol = None
            print(str(ex))

    @property
    def command_type(self):
        """Returns the current command type as a VmToken
        """
        return self._command_type

    @property
    def arg1(self):
        """Returns the first argument of the current command, or the operator
           in the case of arithmetic commands
        """
        return self._arg1

    @property
    def arg2(self):
        """Returns the second argument of the current command
        """
        return self._arg2
示例#2
0
class Parser(object):
    def __init__(self, filename):
        self._lexer = Lexer(filename)  # Lexical analyzer instance
        self._lexer.analyze()  # Let the lexer do it's thing
        self._command_type = None  # The type of command we're parsing now
        self._symbol = None  # The current a- or l-command symbol
        self._dest = None  # The current c-command dest field
        self._comp = None  # The current c-command comp field
        self._jump = None  # The current c-command jump field

    @property
    def _next_token(self):
        return self._lexer.get_next_token()

    def _peek_next_token(self):
        return self._lexer.peek_next_token()

    def _parse_a_command(self):
        """Parses commands of the form @symbol or @number
        """
        token, lexeme = self._next_token
        if token == HackToken.IDENTIFIER or token == HackToken.NUMBER:
            self._command_type = CommandType.A_COMMAND
            self._symbol = lexeme
        else:
            raise Exception(
                "Invalid input '{}'; expected identifier or number.".format(
                    lexeme))

    def _parse_l_command(self):
        """Parses label commands of the form (symbol)
        """
        token, lexeme = self._next_token
        if token == HackToken.IDENTIFIER:
            self._command_type = CommandType.L_COMMAND
            self._symbol = lexeme
            # Consume and ignore the next token
            # Well, mostly ignore -- it's useful for error checking
            token, lexeme = self._next_token
            if token != HackToken.OP_RPAREN:
                raise Exception(
                    "Invalid input '{}'; expected ')'.".format(lexeme))
        else:
            raise Exception(
                "Invalid input '{}'; expected identifier.".format(lexeme))

    def _parse_dest(self, token, lexeme):
        """Sets the dest part of the c-command, if there is one
        """
        t, _ = self._peek_next_token()
        if t == HackToken.OP_ASSIGN:
            # This token is the dest; consume the '=' and return the next token
            self._next_token
            self._dest = lexeme
            return self._next_token
        else:
            # This is not the dest; return it back to the caller
            self._dest = None
            return Token(token, lexeme)

    def _parse_comp(self, token, lexeme):
        """Sets the comp part of the c-command; this is required
        """
        self._comp = lexeme
        if token == HackToken.OP_NOT or token == HackToken.OP_MINUS:
            # Unary not or negation
            _, l = self._next_token
            self._comp += l  # concatenate the two lexemes
        elif token == HackToken.NUMBER or token == HackToken.IDENTIFIER:
            t, l = self._peek_next_token()
            if t in [
                    HackToken.OP_AND, HackToken.OP_OR, HackToken.OP_PLUS,
                    HackToken.OP_MINUS
            ]:
                # We've got a binary operator; use it and get the other operand
                _, l = self._next_token
                _, ll = self._next_token
                self._comp += (l + ll)
        else:
            raise Exception("Invalid input '{}'.".format(lexeme))

    def _parse_jump(self):
        """Sets the jump part of the c-command, if it exists
        """
        t, _ = self._peek_next_token()
        if t == HackToken.OP_SEMICOLON:
            # Consume the semicolon; next token should be the jump value
            # TODO: Add error checking here
            self._next_token
            _, l = self._next_token
            self._jump = l
        else:
            # No jump
            self._jump = None

    def _parse_c_command(self, token, lexeme):
        """Parses commands of the following forms:
            dest=comp;jump
            dest=comp
            comp;jump
            comp
        """
        self._command_type = CommandType.C_COMMAND
        comp_tok, comp_val = self._parse_dest(token, lexeme)
        self._parse_comp(comp_tok, comp_val)
        self._parse_jump()

    def has_more_commands(self):
        return self._lexer.has_more_tokens()

    def advance(self):
        try:
            token, lexeme = self._next_token
            if token == HackToken.OP_ADDR:
                self._parse_a_command()
            elif token == HackToken.OP_LPAREN:
                self._parse_l_command()
            elif token == HackToken.EOF:
                self._command_type = None
                self._symbol = None
            else:
                self._parse_c_command(token, lexeme)
        except Exception as ex:
            self._command_type = CommandType.ERROR
            self._symbol = None
            print(str(ex))

    @property
    def command_type(self):
        return self._command_type

    @property
    def symbol(self):
        return self._symbol

    @property
    def dest(self):
        return self._dest or "null"

    @property
    def comp(self):
        return self._comp

    @property
    def jump(self):
        return self._jump or "null"