def main(): """Drives the Jack-to-VM translation process""" file_name = sys.argv[1] tokenizers = [] output_files = [] abs_path = os.path.abspath(file_name) if '.jack' in file_name and file_name[-5:] == '.jack': tokenizer = JackTokenizer(abs_path) tokenizers.append(tokenizer) output_path = os.path.splitext(abs_path)[0] + '.xml' output_files.append(output_path) else: for walk_obj in os.walk(abs_path): for jack_file in walk_obj[2]: if '.jack' in jack_file and jack_file[-5:] == '.jack': tokenizer = JackTokenizer(abs_path + '/' + jack_file) tokenizers.append(tokenizer) output_path = abs_path + '/' + jack_file[:-5] + '.xml' output_files.append(output_path) for tokenizer in tokenizers: while tokenizer.has_more_tokens(): tokenizer.advance() token_type = tokenizer.token_type() if token_type == 'KEYWORD': keyword = tokenizer.keyword() elif token_type == 'SYMBOL': symbol = tokenizer.symbol() elif token_type == 'IDENTIFIER': identifier = tokenizer.identifier() elif token_type == 'INT_CONST': int_val = tokenizer.int_val() elif token_type == 'STRING_CONST': string_val = tokenizer.string_val()
def test_advance(self): """Tests all parts of the tokenizer using this Jack code: /** Multi-line comment for some class. */ class A{ // Single-line comment let x = -4; do Output.printString("Ring Constants!"); } """ tokenizer = JackTokenizer("test.jack") tokenizer.advance() self.assertEqual(tokenizer.keyword(), CLASS) self.assertEqual(tokenizer.token_type(), KEYWORD) tokenizer.advance() self.assertEqual(tokenizer.identifier(), 'A') self.assertEqual(tokenizer.token_type(), IDENTIFIER) tokenizer.advance() self.assertEqual(tokenizer.symbol(), '{') self.assertEqual(tokenizer.token_type(), SYMBOL) tokenizer.advance() self.assertEqual(tokenizer.keyword(), LET) self.assertEqual(tokenizer.token_type(), KEYWORD) tokenizer.advance() self.assertEqual(tokenizer.identifier(), 'x') self.assertEqual(tokenizer.token_type(), IDENTIFIER) tokenizer.advance() self.assertEqual(tokenizer.symbol(), '=') self.assertEqual(tokenizer.token_type(), SYMBOL) tokenizer.advance() self.assertEqual(tokenizer.symbol(), '-') self.assertEqual(tokenizer.token_type(), SYMBOL) tokenizer.advance() self.assertEqual(tokenizer.int_val(), 4) self.assertEqual(tokenizer.token_type(), INT_CONST) tokenizer.advance() self.assertEqual(tokenizer.symbol(), ';') self.assertEqual(tokenizer.token_type(), SYMBOL) tokenizer.advance() self.assertEqual(tokenizer.keyword(), DO) self.assertEqual(tokenizer.token_type(), KEYWORD) tokenizer.advance() self.assertEqual(tokenizer.identifier(), 'Output') self.assertEqual(tokenizer.token_type(), IDENTIFIER) tokenizer.advance() self.assertEqual(tokenizer.symbol(), '.') self.assertEqual(tokenizer.token_type(), SYMBOL) tokenizer.advance() self.assertEqual(tokenizer.identifier(), 'printString') self.assertEqual(tokenizer.token_type(), IDENTIFIER) tokenizer.advance() self.assertEqual(tokenizer.symbol(), '(') self.assertEqual(tokenizer.token_type(), SYMBOL) tokenizer.advance() self.assertEqual(tokenizer.string_val(), 'Ring Constants!') self.assertEqual(tokenizer.token_type(), STRING_CONST) tokenizer.advance() self.assertEqual(tokenizer.symbol(), ')') self.assertEqual(tokenizer.token_type(), SYMBOL) tokenizer.advance() self.assertEqual(tokenizer.symbol(), ';') self.assertEqual(tokenizer.token_type(), SYMBOL) tokenizer.advance() self.assertEqual(tokenizer.symbol(), '}') self.assertEqual(tokenizer.token_type(), SYMBOL)
class CompilationEngine: def __init__(self, filename): self.tokenizer = JackTokenizer(filename) def compile(self, filename): input_stream = initialize(filename) compileClass() def xml_print_el(self): xmlprint(self.tokenizer.token_type, self.tokenizer.token) def advanceSymbol(self, symbol): if self.tokenizer.hasMoreTokens(): self.tokenizer.advance() else: raise SyntaxError('Symbol expected:' + symbol + ', found end of stream') if self.tokenizer.symbol() != symbol: raise SyntaxError('Symbol expected:' + symbol) def advanceKeyword(self, keyword): if self.tokenizer.hasMoreTokens(): self.tokenizer.advance() else: raise SyntaxError('Keyword expected:' + keyword + ', found end of stream') if self.tokenizer.keyword() != keyword: raise SyntaxError('Keyword expected:' + keyword) def advanceTokenType(self, tokenType): if self.tokenizer.hasMoreTokens(): self.tokenizer.advance() else: raise SyntaxError('Identifier expected, found end of stream') if self.tokenizer.token_type != 'identifier': raise SyntaxError('Identifier expected') def advanceKeywords(self, *args): if self.tokenizer.hasMoreTokens(): self.tokenizer.advance() else: raise SyntaxError('Keywords expected:' + args + ', found end of stream') if self.tokenizer.keyword() != keyword: raise SyntaxError('Keywords expected:' + args) def advanceAndGetType(self): if self.tokenizer.hasMoreTokens(): self.tokenizer.advance() else: raise SyntaxError('type expected, found end of stream') if self.is_type(): return self.tokenizer.token else: raise SyntaxError('type expected') def is_type(self): return self.tokenizer.keyword() in [ 'int', 'char', 'boolean' ] or self.tokenizer.token_type == 'identifier' def advanceAndGetReturnType(self): self.advance() if self.is_type() or self.tokenizer.keyword() == 'void': return self.tokenizer.token else: raise SyntaxError('type expected') def advanceToClassName(self): self.advanceTokenType('identifier') return self.tokenizer.identifier() def advanceToVarName(self): self.advanceTokenType('identifier') return self.tokenizer.identifier() def advanceToSubroutineName(self): self.advanceTokenType('identifier') return self.tokenizer.identifier() def hasClassVarDec(self): pass def advance(self): if self.tokenizer.hasMoreTokens(): self.tokenizer.advance() else: raise SyntaxError('found end of stream!') def compileClass(self): # 'class' className '{' classVarDec* subroutineDec* '}' print('<class>') self.advanceKeyword('class') self.xml_print_el() # classname self.advanceToClassName() className = self.tokenizer.identifier() self.xml_print_el() # { self.advanceSymbol('{') self.xml_print_el() self.advance() # classVarDec* while (self.tokenizer.keyword() in ['static', 'field']): self.compileClassVarDec() # subroutineDec* while (self.tokenizer.keyword() in ['constructor', 'function', 'method']): self.compileSubroutine() # } self.advanceSymbol('}') self.xml_print_el() print('</class>') def compileClassVarDec(self): # ('static'|'field') type varName (',' varName)* ';' print('<classVarDec>') # ('static'|'field') self.xml_print_el() # type type = self.advanceAndGetType() self.xml_print_el() # varName varName = self.advanceToVarName() self.xml_print_el() # ; self.advanceSymbol(';') self.xml_print_el() print('</classVarDec>') self.advance() def compileSubroutine(self): print('<subroutineDec>') kind = self.tokenizer.keyword() self.xml_print_el() # ( 'void' | type ) return_type = self.advanceAndGetReturnType() self.xml_print_el() # subroutineName name = self.advanceToSubroutineName() self.xml_print_el() # ( self.advanceSymbol('(') self.xml_print_el() # TODO parameterList self.compileParameterList() # ( self.advanceSymbol(')') self.xml_print_el() # subroutineBody self.compileSubroutineBody() print('</subroutineDec>') self.advance() pass def compileSubroutineBody(self): print('<subroutineBody>') # { self.advanceSymbol('{') self.xml_print_el() # varDec* #TODO a structure to represent the * self.varDec() # statementes self.compileStatements() # } self.advanceSymbol('}') self.xml_print_el() print('</subroutineBody>') def compileParameterList(self): print('<parameterList>') print('</parameterList>') def compileVarDec(self): pass def compileStatements(self): pass def compileDo(): pass def compileLet(): pass def compileWhile(): pass def compileReturn(): pass def compileIf(): pass def compileExpression(): pass # if identifier: variable, array entry, subroutine call def compileTerm(): # single lookahead token - can be [ ( or . pass # comma separated list of expressions def compileExpressionList(): pass