def advance(self): # Inicializa carácter char = self.input_file.read(1) # quita espacios en blanco y comentarios char = self._skip_whitespace_and_comments(starting_char=char) # obtiene el token if self._is_string_const_delimeter(char): token = JackToken(self._get_string_const(starting_char=char)) elif char.isalnum(): token = JackToken(self._get_alnum_underscore(starting_char=char)) else: # símbolo token = JackToken(char) # establece el token if self.current_token: self.current_token = self.next_token self.next_token = token self.tokens_found.append(token) else: # configuración inicial self.current_token = token self.next_token = token self.tokens_found.append(token) # obtiene el siguiente token self.advance() if self.current_token.is_empty(): self.has_more_tokens = False
def advance(self): # initialize char char = self.input_file.read(1) # skip whitespace and comments char = self._skip_whitespace_and_comments(starting_char=char) # get token if self._is_string_const_delimeter(char): token = JackToken(self._get_string_const(starting_char=char)) elif char.isalnum(): token = JackToken(self._get_alnum_underscore(starting_char=char)) else: # symbol token = JackToken(char) # set tokens if self.current_token: self.current_token = self.next_token self.next_token = token self.tokens_found.append(token) else: # initial setup self.current_token = token self.next_token = token self.tokens_found.append(token) # get next token self.advance() if self.current_token.is_empty(): self.has_more_tokens = False
def _setToken(self, tokenString): self.currentToken = JackToken(tokenString) if self.debugMode: xml = "<" + self.currentToken.type + "> " + _encodeXmlToken( self.currentToken.token ) + " </" + self.currentToken.type + ">\n" self.ouputFileObject.write(xml)
def get_tokens(self, code): """ parse individual tokens from a line of code """ # create dict to store tokens and their type token_list = [] # list of JackToken objects identifier_in_tokens = False # detect lines with identifiers def check_for_int(token): # helper function to check if token is an integer try: int(token) return True except ValueError: return False # detect strings strings = re.findall(r'["\']([^"\']+)["\']', code) # remove whitespace that is not within a string and flatten result sub_tokens = sum([[t] if t in strings else t.split() for t in re.split(r'["\']', code)], []) # separate by symbols, flatten result, and lose empty tokens tokens = list( filter( None, sum([[t] if t in strings else re.split( fr'({self.symbol_regex})', t) for t in sub_tokens], []))) # get token types for token in tokens: if token in strings: type = 'string' elif token in jack_symbols_list: type = 'symbol' elif token in jack_keywords_list: type = 'keyword' elif check_for_int(token): type = 'integer' else: type = 'identifier' # detect lines with identifiers identifier_in_tokens = True # put the token in the token_list token_list.append(JackToken(token, type)) if identifier_in_tokens: # handle identifier and associated scope self.scope_tracker.handle_identifier(token_list) return token_list
def compile_term(self, token): """ compile a term within an expression """ # define symbols that indicate end of term returnable_symbols = [ ')', ']', ',', ';', '+', '-', '*', '/', '&', '|', '<', '>', '~', '=' ] # keep track of what's been compiled so far previous_token = JackToken(None, None) self.output('<term>') # open term # itereate through token within term while True: # if token indicated end of term if token.value in returnable_symbols: # detect whether it is unary or not if not previous_token.value: # token is unary operator, write before returning self.write_token(token) token = self.compile_term(self.get_next_token()) break else: # token is binary operator (not necessarily bitwise) # return token to caller to write in expression as # term seperator break elif token.value == '(': # either a subroutine was called or there is some # parenthetical math operations going one if previous_token.type == "identifier": # if an identifier preceeds the token, a subroutine # was called, in which case an expression list follows self.write_token(token) token = self.compile_expression_list() self.write_token(token) else: # otherwise, parenthesis are present to define order # of operations within maths, which means it is enclosing # an expression rather than expression list self.write_token(token) token = self.compile_expression() self.write_token(token) elif previous_token.type == "identifier" and token.value == '[': # if term is composed of an array access, compile expression # within indexing brackets, as index can itself be an # expression self.write_token(token) token = self.compile_expression() self.write_token(token) else: # otherwise, output token as nromal self.write_token(token) # keep track of last compiled token previous_token = token token = self.get_next_token() # continue with next token self.output('</term>') # close term return token # caller is expecting a token back