def line_tokenize(self, non_literal, line_num): """ Tokenize a line, with string literals removed. :param non_literal: text to be tokenize, no string literal :param line_num: the line number :return: None """ lst = normalize(non_literal) for part in lst: if part.isidentifier(): self.tokens.append(stl.IdToken(line_num, part)) elif is_float(part): self.tokens.append(stl.NumToken(line_num, part)) elif is_integer(part): self.tokens.append(stl.NumToken(line_num, part)) elif stl.is_in_all(part): self.tokens.append(stl.IdToken(line_num, part)) elif part[:-1] in stl.OP_EQ: self.tokens.append(stl.IdToken(line_num, part)) elif part == stl.EOL: self.tokens.append(stl.IdToken(line_num, stl.EOL)) elif part in stl.OMITS: pass else: raise stl.ParseException( "Unknown symbol: '{}', at line {}".format(part, line_num))
def import_file(self, full_path, import_name): """ Imports an external sp file. This method tokenize the imported file, and inserts all tokens except the EOF token of the imported file into the current file. :param full_path: the path of the file to be imported :param import_name: the name to be used """ with open(full_path, "r") as file: lexer = Tokenizer() lexer.setup(self.spl_path, full_path, get_dir(full_path), import_lang=False) # lexer.script_dir = get_dir(full_path) lexer.tokenize(file) # print(lexer.tokens) self.tokens.append(stl.IdToken(LINE_FILE, import_name)) self.tokens.append(stl.IdToken(LINE_FILE, full_path)) self.tokens.append(stl.IdToken(LINE_FILE, "{")) self.tokens += lexer.tokens self.tokens.pop() # remove the EOF token self.tokens.append(stl.IdToken(LINE_FILE, "}"))
def restore_tokens(self, file: _io.BytesIO): self.tokens.clear() while True: flag = int.from_bytes(file.read(1), "big") if flag == 0: self.tokens.append(stl.Token((stl.EOF, None))) break else: line = int(stl.read_string(file)) file_name = stl.read_string(file) lf = line, file_name if flag == 1: token: stl.NumToken = stl.NumToken(lf, stl.read_string(file)) elif flag == 2: token: stl.LiteralToken = stl.LiteralToken( lf, stl.read_string(file)) elif flag == 3: token: stl.IdToken = stl.IdToken(lf, stl.read_string(file)) elif flag == 4: token: stl.DocToken = stl.DocToken(lf, stl.read_string(file)) else: raise stl.ParseException("Unknown flag: {}".format(flag)) self.tokens.append(token)
def tokenize(self, source): """ Tokenize the source spl source code into a list of tokens, stored in the memory of this Lexer. :param source: the source code, whether an opened file or a list of lines. :return: None """ self.tokens.clear() if self.import_lang and self.file_name[-7:] != "lang.sp": self.tokens += [ stl.IdToken(LINE_FILE, "import"), stl.IdToken(LINE_FILE, "namespace"), stl.LiteralToken(LINE_FILE, "lang") ] self.find_import(0, 3) if isinstance(source, list): self.tokenize_text(source) else: self.tokenize_file(source)
def find_import(self, from_, to): """ Looks for import statement between the given slice of the tokens list. :param from_: the beginning position of search :param to: the end position of search :return: None """ for i in range(from_, to, 1): token = self.tokens[i] if isinstance(token, stl.IdToken) and token.symbol == "import": next_token: stl.Token = self.tokens[i + 1] namespace_token = None if isinstance( next_token, stl.IdToken) and next_token.symbol == "namespace": namespace_token = next_token self.tokens.pop(i + 1) path_token: stl.LiteralToken = self.tokens[i + 1] elif isinstance(next_token, stl.LiteralToken): path_token: stl.LiteralToken = self.tokens[i + 1] else: raise stl.ParseException( "Unexpected token in file '{}', at line {}".format( next_token.file, next_token.line)) name = path_token.text if name[-3:] == ".sp": # user lib if len(self.script_dir) == 0: file_name = name[:-3] + ".sp" else: file_name = self.script_dir + "{}{}".format( "/", name[:-3]) + ".sp" # file_name = "{}{}{}".format(self.script_dir, os.sep, name[:-3]).replace(".", "/") + ".sp" if "/" in name: import_name = name[name.rfind("/") + 1:-3] else: import_name = name[:-3] else: # system lib file_name = "{}{}lib{}{}.sp".format( self.spl_path, os.sep, os.sep, name) import_name = name if len(self.tokens) > i + 2: as_token: stl.IdToken = self.tokens[i + 2] if as_token.symbol == "as": if namespace_token is not None: raise stl.ParseException( "Unexpected combination 'import namespace ... as ...'" ) name_token: stl.IdToken = self.tokens[i + 3] import_name = name_token.symbol self.tokens.pop(i + 1) self.tokens.pop(i + 1) self.tokens.pop(i + 1) # remove the import name token self.import_file(file_name, import_name) if namespace_token: lf = namespace_token.line, namespace_token.file self.tokens.append(namespace_token) self.tokens.append(stl.IdToken(lf, import_name)) self.tokens.append(stl.IdToken(lf, stl.EOL)) break