class TSParser: def __init__(self, lang: str): self.lang = lang self.parser = TSBaseParser() self.tsLang = Language(LIBRARY_PATH, lang) self.parser.set_language(self.tsLang) def parse(self, code: str) -> Node: return self(code).root_node def sexp(self, code: str) -> str: return self.parse(code).sexp() def query(self, query: str, code: str) -> dict[str, str]: return dict((k, extract(v, code)) for v, k in self.tsLang.query( query).captures(self.parse(code))) def __call__(self, value: str) -> Tree: return self.parser.parse(bytes(value, "utf8"))
#!/usr/bin/env python3 from tree_sitter import Language, Parser from pathlib import Path import pkg_resources LANGUAGE = Language(next(Path(__file__).parent.glob("binding.*.so")), "minizinc") HIGHLIGHT_QUERY = LANGUAGE.query( pkg_resources.resource_string(__name__, "queries/highlights.scm")) try: from pygments.lexer import Lexer from pygments import token class TreeSitterLexer(Lexer): ts_alias = { "comment": token.Comment, "type.builtin": token.Name.Builtin, "punctuation.delimiter": token.Punctuation, "function": token.Name.Function, "keyword": token.Keyword, "operator": token.Operator, "punctuation.bracket": token.Punctuation, "number": token.Number, "string": token.String, "escape": token.String.Escape, "constant.builtin": token.Generic, "variable": token.Name.Variable, }
class BaseParser(object): """ Base parser exposes the common interface that we extend per language """ def __init__(self, language: str, query_class_name: str, query_file_path: str, library_loc: str = None): if os.getenv("TS_LIB_PATH") is not None and library_loc is None: library_loc = os.getenv("TS_LIB_PATH") if not library_loc: raise ParserLibraryNotFoundError( "Parser library path is 'None'. Please either set up the environment or call the constructor with the path" ) if not Path(library_loc).exists() or not Path(library_loc).is_file(): raise ParserLibraryNotFoundError( f"Parser library '{library_loc}' not found. Did you set up the environement variables?" ) self.language = Language(library_loc, language) self.parser = Parser() self.parser.set_language(self.language) self.qclass = Query.fromFile(query_file_path) self.QUERIES = self.qclass[query_class_name] def _run_query_and_get_captures(self, q_name: str, root_node: Node) -> List: """ Runs a query on the the language and returns the raw spans. Sometimes you may need to do to some specilized processing on the returned lines Check languane specific codes to see what that can be. """ query = self.language.query(self.QUERIES[q_name]) captures = query.captures(root_node) return captures def parse_file(self, file_path: str): """ Parses a single file and retunrs True if success """ if not Path(file_path).exists() or not Path(file_path).is_file(): raise SourceFileNotFoundError(f"Source file {file_path} not found") try: with open(file_path, encoding='utf-8') as f: blob = f.read() except UnicodeDecodeError: return False self.raw_code = blob self.splitted_code = blob.split("\n") self.tree: Tree = self.parser.parse(bytes(blob.encode('utf-8'))) self.root_node: Node = self.tree.root_node return True def parse_code_as_string(self, blob: str): """ parse_code_as_string provides the same functions as parse_file but takes string blob as code instead of a file location. """ try: self.raw_code = blob self.splitted_code = blob.decode('utf-8').split("\n") self.tree: Tree = self.parser.parse(bytes(blob.encode('utf-8'))) self.root_node: Node = self.tree.root_node return True except UnicodeDecodeError: return False except AttributeError: self.raw_code = blob self.splitted_code = blob.split("\n") self.tree: Tree = self.parser.parse(bytes(blob.encode('utf-8'))) self.root_node: Node = self.tree.root_node return True def sexp(self): return self.tree.root_node.sexp() def reload_queries(self): """ Reloads the query file and the internal data structure. This is a temporary method and it should not be a part of the API. mainly to dynamically reload the queries while in development for IPython console """ self.qclass.reload() def _has_children(self, node: Node) -> bool: return len(node.children) > 0 def _walk_recursive(self, cursor: TreeCursor, apply): apply(cursor.node) for ch in cursor.node.children: self._walk_recursive(ch.walk(), apply) def _reduce_recursive(self, cursor: TreeCursor, reduction, accumulator): acc = accumulator for ch in cursor.node.children: acc = self._reduce_recursive(ch.walk(), reduction, reduction(ch, acc)) return acc def walk(self, apply): """ Iterate over the tree of the parsed code and apply the given """ self._walk_recursive(self.root_node.walk(), apply) def reduction(self, reduction, neutral): """ Apply a reduction operation over the tree of the parsed code """ return self._reduce_recursive(self.root_node.walk(), reduction, reduction(self.root_node, neutral))