def fileToString(fileName, options=None): """ takes a file and converts it into a string while replacing newline character with a space. Adds a EOF symbol at the end """ file_content = "" with open(fileName, 'r') as file: for line in file: if scanner.isComment(line): continue else: file_content += line.replace('\n', ' ') file_content += EOF return file_content
def parseFile(fileName): with open(fileName, 'r') as source_file: line_number = 1 for line in source_file: lexeme_number = 1 if scanner.isComment(line): comments.append(line) continue words = parseWords(line) try: for word in words: t = (scanner.getToken(word)) if t is not None: symbol_table.append(t) lexeme_number += 1 else: not_tokens.append(word) line_number += 1 except ValueError as e: print ("error in line: " + str(line_number)) print ("line content: " + line) print e
def tokenize(files, options): for fileName in files: with open(fileName, 'r') as source_file: line_number = 1 for line in source_file: lexeme_number = 1 if scanner.isComment(line): comments.append(line) continue words = parseWords(line) try: for word in words: t = (scanner.getToken(word)) if t is not None: if t.Word not in symbol_table: symbol_table[t.Word] = t lexeme_number += 1 else: not_tokens.append(word) raise TokenizerException(word, line, line_number) line_number += 1 except ValueError as e: print ("Tokenizer error in line: " + str(line_number)) print ("line content: " + line) print e except TokenizerException as e: print e if optionTokenizeAll not in options: raise e if(optionPrintTokens in options): printTokens() printNotTokens() return symbol_table