def __init__(self, filename, content, keywords): self.logger = Logger(self, 'lexer') self.filename = filename self._content = content self._lexems = [] self._keywords = keywords self.stats = collections.defaultdict(int) self.tokens = [] self.tokenIndex = 0 self.genTokens() self.numTokens = len(self.tokens)
class DB(object): """ """ logger = Logger(__name__) def import_a2l(self, file_name, debug=False): """ Parameters ---------- file_name: str Name of the A2L to be imported. If you don't specify an extension ``.a2l`` is added. Returns ------- SQLAlchemy session object. Note ---- ``AML`` and ``IF_DATA`` sections are currently not processed. """ from os import unlink from pya2l.a2l_listener import ParserWrapper, A2LListener, cut_a2ml parser = ParserWrapper('a2l', 'a2lFile', A2LListener, debug=debug) self._set_path_components(file_name) try: unlink(self._dbfn) except Exception: pass data = open(self._a2lfn).read() data, a2ml = cut_a2ml(data) self.session = parser.parseFromString(data, dbname=self._dbfn) return self.session def export_a2l(self, file_name): """ """ self._set_path_components(file_name) raise NotImplementedError("Export functionality not implemented yet.") def open_existing(self, file_name): """ """ self._set_path_components(file_name) if not path.exists(self._dbfn): return None else: self.db = model.A2LDatabase(self._dbfn) self.session = self.db.session res = self.session.query(model.MetaData).first() if res: return self.session else: return None def _set_path_components(self, file_name): """ """ from os import path self._pth, self._base = path.split(file_name) fbase, ext = path.splitext(self._base) self._dbfn = "{}.a2ldb".format(fbase) if not ext or ext.lower() == ".a2l" or ext.lower() == ".a2ldb": self._a2lfn = "{}.a2l".format(fbase) else: self._a2lfn = "{}{}".format(fbase, ext)
class DB(object): """ """ logger = Logger(__name__) def import_a2l(self, file_name, debug=False, in_memory=False, remove_existing=False): """Import `.a2l` file to `.a2ldb` database. Parameters ---------- file_name: str Name of the A2L to be imported. If you don't specify an extension ``.a2l`` is added. debug: bool Additional debugging output. in_memory: bool Create non-persistent in-memory database. remove_existing: bool ** DANGER ZONE **: Remove existing database. Returns ------- SQLAlchemy session object. Raises ------ OSError If database already exists. Note ---- ``AML`` and ``IF_DATA`` sections are currently not processed. """ from os import unlink from pya2l.a2l_listener import ParserWrapper, A2LListener, cut_a2ml self.in_memory = in_memory parser = ParserWrapper('a2l', 'a2lFile', A2LListener, debug=debug) self._set_path_components(file_name) if not in_memory: if remove_existing: try: unlink(self._dbfn) except Exception: pass elif path.exists(self._dbfn): raise OSError("file '{}' already exists.".format(self._dbfn)) data = open(self._a2lfn).read() data, a2ml = cut_a2ml(data) self.session = parser.parseFromString(data, dbname=self._dbfn) return self.session def export_a2l(self, file_name, in_memory): """ """ self.in_memory = in_memory self._set_path_components(file_name) raise NotImplementedError("Export functionality not implemented yet.") def open_existing(self, file_name): """Open an existing `.a2ldb` database. Parameters ---------- file_name: str Name of your database file, resulting from :meth:`import_a2l`. Extension `.a2ldb` not needed. Returns ------- SQLAlchemy session object. Raises ------ OSError If database already exists. """ self.in_memory = False self._set_path_components(file_name) if not path.exists(self._dbfn): raise OSError("file '{}' does not exists.".format(self._dbfn)) else: self.db = model.A2LDatabase(self._dbfn) self.session = self.db.session res = self.session.query(model.MetaData).first() if res: return self.session else: raise InvalidA2LDatabase( "Database seems to be corrupted. No meta-data found.") def _set_path_components(self, file_name): """ """ self._pth, self._base = path.split(file_name) fbase, ext = path.splitext(self._base) if self.in_memory: self._dbfn = ":memory:" else: self._dbfn = path.join(self._pth, "{}.a2ldb".format(fbase)) if not ext or ext.lower() == ".a2l" or ext.lower() == ".a2ldb": self._a2lfn = "{}.a2l".format(fbase) else: self._a2lfn = "{}{}".format(fbase, ext) self._a2lfn = path.join(self._pth, self._a2lfn)
def __init__(self, tree): self.logger = Logger(self, 'A2LParser') self.tree = tree self.parser = tree.parser self.level = 0 self.blockStack = []
def __init__(self): self.logger = Logger(self, 'parser')
class DB(object): """""" A2L_TEMPLATE = pkgutil.get_data("pya2l.cgen.templates", "a2l.tmpl") logger = Logger(__name__) def import_a2l( self, file_name, debug=False, in_memory=False, remove_existing=False, encoding="latin-1", ): """Import `.a2l` file to `.a2ldb` database. Parameters ---------- file_name: str Name of the A2L to be imported. If you don't specify an extension ``.a2l`` is added. debug: bool Additional debugging output. in_memory: bool Create non-persistent in-memory database. remove_existing: bool ** DANGER ZONE **: Remove existing database. Returns ------- SQLAlchemy session object. Raises ------ OSError If database already exists. Note ---- ``AML`` and ``IF_DATA`` sections are currently not processed. """ from os import unlink from pya2l.a2l_listener import A2LListener, cut_a2ml from pya2l.parserlib import ParserWrapper self.in_memory = in_memory parser = ParserWrapper("a2l", "a2lFile", A2LListener, debug=debug) self._set_path_components(file_name) if not in_memory: if remove_existing: try: unlink(self._dbfn) except Exception: pass elif path.exists(self._dbfn): raise OSError("file '{}' already exists.".format(self._dbfn)) data = open(self._a2lfn, encoding=encoding).read() data, a2ml = cut_a2ml(data) self.session = parser.parseFromString(data, dbname=self._dbfn) return self.session def export_a2l(self, file_name=sys.stdout, encoding="ascii"): """""" namespace = dict(session=self.db.session, model=model) data = doTemplateFromText( self.A2L_TEMPLATE, namespace, formatExceptions=False, encoding=encoding ) result = [] for line in data.splitlines(): line = line.rstrip() if not line: continue else: result.append(line) result = "\n".join(result) print(result) # with io.open("{}.render".format(file_name), "w", encoding = encoding, newline = "\r\n") as outf: # outf.write(res) def open_create(self, file_name): """Open or create an A2LDB.""" self.in_memory = False self._set_path_components(file_name) if not path.exists(self._dbfn): return self.import_a2l(self._a2lfn) else: return self.open_existing(self._dbfn) def open_existing(self, file_name): """Open an existing `.a2ldb` database. Parameters ---------- file_name: str Name of your database file, resulting from :meth:`import_a2l`. Extension `.a2ldb` not needed. Returns ------- SQLAlchemy session object. Raises ------ OSError If database already exists. """ self.in_memory = False self._set_path_components(file_name) if not path.exists(self._dbfn): raise OSError("file '{}' does not exists.".format(self._dbfn)) else: self.db = model.A2LDatabase(self._dbfn) self.session = self.db.session res = self.session.query(model.MetaData).first() if res: return self.session else: raise InvalidA2LDatabase( "Database seems to be corrupted. No meta-data found." ) def _set_path_components(self, file_name): """""" self._pth, self._base = path.split(file_name) fbase, ext = path.splitext(self._base) if self.in_memory: self._dbfn = ":memory:" else: self._dbfn = path.join(self._pth, "{}.a2ldb".format(fbase)) if not ext or ext.lower() == ".a2l" or ext.lower() == ".a2ldb": self._a2lfn = "{}.a2l".format(fbase) else: self._a2lfn = "{}{}".format(fbase, ext) self._a2lfn = path.join(self._pth, self._a2lfn)
class MixInBase: logger = Logger(__name__)
class Tokenizer(object): TOKENS = re.compile(r""" \s*"(?P<STRING>[^"]*?)" | \s*(?P<IDENT>[a-zA-Z_][a-zA-Z_0-9.|]*) | \s*(?P<BEGIN>/begin) | \s*(?P<END>/end) | \s*(?P<NUMBER> (0(x|X)?[0-9a-fA-F]+) | ((\+ | \-)?\d+) ) """, re.VERBOSE | re.DOTALL) def __init__(self, filename, content, keywords): self.logger = Logger(self, 'lexer') self.filename = filename self._content = content self._lexems = [] self._keywords = keywords self.stats = collections.defaultdict(int) self.tokens = [] self.tokenIndex = 0 self.genTokens() self.numTokens = len(self.tokens) def __del__(self): pass def lexer(self, line, stringDelimiter = '"', splitBy = None): """Split a line into tokens while considering delimited strings.""" head, sep, tail = line.partition(stringDelimiter) if sep: result = [] if head: result.extend(head.split(splitBy)) head, sep, tail = tail.partition(stringDelimiter) result.extend(["%(sep)s%(value)s%(sep)s" % {'value': head, 'sep': stringDelimiter }]) if tail: result.extend(self.lexer(tail)) return result else: if head: result = head.split(splitBy) else: result = list() return result def makeToken(self, lexem): tokenType = None if lexem.startswith('"') and lexem.endswith('"'): tokenType = TokenType.STRING lexem = lexem.strip('"') self.stats[TokenType.STRING] += 1 elif lexem.isdigit(): tokenType = TokenType.NUMBER lexem = long(lexem) self.stats[TokenType.NUMBER] += 1 elif lexem.startswith('0x') or lexem.startswith('0X'): if HEX_NUMBER.match(lexem[2 : ]): # Look before you leap. tokenType = TokenType.HEX_NUMBER lexem = long(lexem[2: ], 16) else: tokenType = TokenType.IDENT self.checkIdentifier(lexem) self.stats[TokenType.HEX_NUMBER] += 1 elif lexem.lower() == '/begin': tokenType = TokenType.BEGIN elif lexem.lower() == '/end': tokenType = TokenType.END elif lexem in self._keywords: tokenType = TokenType.KEYWORD self.stats[TokenType.KEYWORD] += 1 else: if lexem[0].isdigit() or lexem[0] or ('+', '-'): try: lexem = float(lexem) tokenType = TokenType.FLOAT self.stats[TokenType.FLOAT] += 1 except: tokenType = TokenType.IDENT self.checkIdentifier(lexem) return (tokenType, lexem) def genTokens(self): lineEnumerator = enumerate(self._content.splitlines(), 1) for lineNo, line in lineEnumerator: self.lineNo = lineNo match = BEGIN_AML.search(line) if match: start, end = match.span() savedLine = line[ : start] result = [line[ start : end]] while True: self.lineNo, line = lineEnumerator.next() result.append(line) match = END_AML.search(line) if match: break aml = ''.join(result) self.tokens.append((self.lineNo, (TokenType.AML, aml))) line = savedLine lexems = self.lexer(line.strip()) if lexems == []: continue for lexem in lexems: token = self.makeToken(lexem) if token[0] == None: print("*** '%s%u does not match'" % (lexem, self.lineNo)) else: pass self.tokens.append((self.lineNo, token, )) def tokenAvailable(self): return self.tokenIndex < self.numTokens def getToken(self): token = self.tokens[self.tokenIndex] self.tokenIndex += 1 return token def peekToken(self): token = self.tokens[self.tokenIndex] return token def stepBack(self, count = 1): self.tokenIndex -= count def checkIdentifier(self, identifier): for item in identifier.split('.'): # Identifiers can be hierarchically. if not IDENTIFIER.match(item): self.logger.warn("Part '{1}' of identifier '{0}' is not a valid C-identifier.".format(identifier, item))
class A2LParser(object): def __init__(self): self.logger = Logger(self, 'parser') def parseFromFileName(self, filename): fp = codecs.open(filename, encoding="utf") self.parse(fp) def parseFromString(self, stringObj): stringBuffer = createStringBuffer(stringObj) self.parse(stringBuffer) def parse(self, fp): keywords = classes.KEYWORD_MAP.keys() self.filename = fp.name if hasattr(fp, 'name') else "<<buffer>>" returnCode, source = self.uncomment(fp) if not returnCode: return source = ''.join(source) tokenizer = Tokenizer(self.filename, source, keywords) classStack = [] classStack.append(classes.RootElement) instanceStack = [] instanceStack.append(classes.instanceFactory("Root")) pushToInstanceStack = False self.lineNo = None while tokenizer.tokenAvailable(): self.lineNo, (tokenType, lexem) = tokenizer.getToken() if tokenType == TokenType.AML: parserWrapper = aml.ParserWrapper('aml', 'amlFile') tree = parserWrapper.parseFromString(lexem) parseAml(tree.value) continue else: print("[%s]%s:%s" % (tokenType, lexem, self.lineNo)) if tokenType == TokenType.BEGIN: self.lineNo, (tokenType, lexem) = tokenizer.getToken() # Move on. pushToInstanceStack = True klass = classes.KEYWORD_MAP.get(lexem) classStack.append(klass) elif tokenType == TokenType.END: self.lineNo, (tokenType, lexem) = tokenizer.getToken() # Move on. classStack.pop() instanceStack.pop() continue elif tokenType == TokenType.KEYWORD: klass = classes.KEYWORD_MAP.get(lexem) if classStack: tos = classStack[-1] if tokenType in (TokenType.BEGIN, TokenType.KEYWORD): fixedAttributes = klass.fixedAttributes variableAttribute = klass.variableAttribute numParameters = len(fixedAttributes) parameters = [ tokenizer.getToken() for _ in range(numParameters) ] attributeValues = [x[1][1] for x in parameters] inst = classes.instanceFactory( lexem.title(), **OrderedDict(zip(fixedAttributes, attributeValues))) if variableAttribute: attr = klass[variableAttribute] result = [] while True: self.lineNo, (tokenType, lexem) = tokenizer.getToken() print(tokenType, lexem) if tokenType in (TokenType.KEYWORD, TokenType.END): tokenizer.stepBack() break result.append(lexem) setattr(inst, attr[1], result) inst.attrs.append(attr[1]) elif tokenType == TokenType.KEYWORD and lexem in ( 'COMPU_TAB', 'COMPU_VTAB', 'COMPU_VTAB_RANGE'): # # COMPU_TAB / COMPU_VTAB / COMPU_VTAB_RANGE require special attention. # attribute = "Items" if lexem == 'COMPU_VTAB_RANGE': sliceLength = 3 valueClass = classes.CompuTriplet variablePart = [ tokenizer.getToken() for _ in range(inst.NumberValueTriples * sliceLength) ] else: valueClass = classes.CompuPair sliceLength = 2 variablePart = [ tokenizer.getToken() for _ in range(inst.NumberValuePairs * sliceLength) ] variablePartValues = [v[1][1] for v in variablePart] result = slicer(variablePartValues, sliceLength, valueClass) inst.attrs.append(attribute) setattr(inst, attribute, result) #print inst instanceStack[-1].children.append(inst) if pushToInstanceStack: instanceStack.append(inst) pushToInstanceStack = False def uncomment(self, fp): # Nested comments are not supported! result = [] multiLineComment = False inComment = False returnCode = True for lineNo, line in enumerate(fp): # Bad style state-machine... self.lineNo = lineNo if not multiLineComment: if '//' in line: cmtPos = line.index('//') line = line[:cmtPos].strip() if line: result.append(line) elif '/*' in line: cmtPos = line.index('/*') startLineNo = lineNo if not '*/' in line: multiLineComment = True inComment = True line = line[:cmtPos].strip() if line: result.append(line) else: if line: result.append(line) else: if '*/' in line: cmtPos = line.index('*/') result.append(line[cmtPos + 2:].strip()) multiLineComment = False inComment = False elif '/*' in line: if inComment: self.logger.error("Nested comments are not allowed.") returnCode = False if multiLineComment: self.logger.error( "Premature end-of-file while processing comment.") returnCode = False return (returnCode, result)
class Tokenizer(object): TOKENS = re.compile( r""" \s*"(?P<STRING>[^"]*?)" | \s*(?P<IDENT>[a-zA-Z_][a-zA-Z_0-9.|]*) | \s*(?P<BEGIN>/begin) | \s*(?P<END>/end) | \s*(?P<NUMBER> (0(x|X)?[0-9a-fA-F]+) | ((\+ | \-)?\d+) ) """, re.VERBOSE | re.DOTALL) def __init__(self, filename, content, keywords): self.logger = Logger(self, 'lexer') self.filename = filename self._content = content self._lexems = [] self._keywords = keywords self.stats = collections.defaultdict(int) self.tokens = [] self.tokenIndex = 0 self.genTokens() self.numTokens = len(self.tokens) def __del__(self): pass def lexer(self, line, stringDelimiter='"', splitBy=None): """Split a line into tokens while considering delimited strings.""" head, sep, tail = line.partition(stringDelimiter) if sep: result = [] if head: result.extend(head.split(splitBy)) head, sep, tail = tail.partition(stringDelimiter) result.extend([ "%(sep)s%(value)s%(sep)s" % { 'value': head, 'sep': stringDelimiter } ]) if tail: result.extend(self.lexer(tail)) return result else: if head: result = head.split(splitBy) else: result = list() return result def makeToken(self, lexem): tokenType = None if lexem.startswith('"') and lexem.endswith('"'): tokenType = TokenType.STRING lexem = lexem.strip('"') self.stats[TokenType.STRING] += 1 elif lexem.isdigit(): tokenType = TokenType.NUMBER lexem = long(lexem) self.stats[TokenType.NUMBER] += 1 elif lexem.startswith('0x') or lexem.startswith('0X'): if HEX_NUMBER.match(lexem[2:]): # Look before you leap. tokenType = TokenType.HEX_NUMBER lexem = long(lexem[2:], 16) else: tokenType = TokenType.IDENT self.checkIdentifier(lexem) self.stats[TokenType.HEX_NUMBER] += 1 elif lexem.lower() == '/begin': tokenType = TokenType.BEGIN elif lexem.lower() == '/end': tokenType = TokenType.END elif lexem in self._keywords: tokenType = TokenType.KEYWORD self.stats[TokenType.KEYWORD] += 1 else: if lexem[0].isdigit() or lexem[0] or ('+', '-'): try: lexem = float(lexem) tokenType = TokenType.FLOAT self.stats[TokenType.FLOAT] += 1 except: tokenType = TokenType.IDENT self.checkIdentifier(lexem) return (tokenType, lexem) def genTokens(self): lineEnumerator = enumerate(self._content.splitlines(), 1) for lineNo, line in lineEnumerator: self.lineNo = lineNo match = BEGIN_AML.search(line) if match: start, end = match.span() savedLine = line[:start] result = [line[start:end]] while True: self.lineNo, line = lineEnumerator.next() result.append(line) match = END_AML.search(line) if match: break aml = ''.join(result) self.tokens.append((self.lineNo, (TokenType.AML, aml))) line = savedLine lexems = self.lexer(line.strip()) if lexems == []: continue for lexem in lexems: token = self.makeToken(lexem) if token[0] == None: print("*** '%s%u does not match'" % (lexem, self.lineNo)) else: pass self.tokens.append(( self.lineNo, token, )) def tokenAvailable(self): return self.tokenIndex < self.numTokens def getToken(self): token = self.tokens[self.tokenIndex] self.tokenIndex += 1 return token def peekToken(self): token = self.tokens[self.tokenIndex] return token def stepBack(self, count=1): self.tokenIndex -= count def checkIdentifier(self, identifier): for item in identifier.split( '.'): # Identifiers can be hierarchically. if not IDENTIFIER.match(item): self.logger.warn( "Part '{1}' of identifier '{0}' is not a valid C-identifier." .format(identifier, item))
class A2LParser(object): def __init__(self): self.logger = Logger(self, 'parser') def parseFromFileName(self, filename): fp = codecs.open(filename, encoding = "utf") self.parse(fp) def parseFromString(self, stringObj): stringBuffer = createStringBuffer(stringObj) self.parse(stringBuffer) def parse(self, fp): keywords = classes.KEYWORD_MAP.keys() self.filename = fp.name if hasattr(fp, 'name') else "<<buffer>>" returnCode, source = self.uncomment(fp) if not returnCode: return source = ''.join(source) tokenizer = Tokenizer(self.filename, source, keywords) classStack = [] classStack.append(classes.RootElement) instanceStack = [] instanceStack.append(classes.instanceFactory("Root")) pushToInstanceStack = False self.lineNo = None while tokenizer.tokenAvailable(): self.lineNo, (tokenType, lexem) = tokenizer.getToken() if tokenType == TokenType.AML: parserWrapper = aml.ParserWrapper('aml', 'amlFile') tree = parserWrapper.parseFromString(lexem) parseAml(tree.value) continue else: print("[%s]%s:%s" % (tokenType, lexem, self.lineNo)) if tokenType == TokenType.BEGIN: self.lineNo, (tokenType, lexem) = tokenizer.getToken() # Move on. pushToInstanceStack = True klass = classes.KEYWORD_MAP.get(lexem) classStack.append(klass) elif tokenType == TokenType.END: self.lineNo, (tokenType, lexem) = tokenizer.getToken() # Move on. classStack.pop() instanceStack.pop() continue elif tokenType == TokenType.KEYWORD: klass = classes.KEYWORD_MAP.get(lexem) if classStack: tos = classStack[-1] if tokenType in (TokenType.BEGIN, TokenType.KEYWORD): fixedAttributes = klass.fixedAttributes variableAttribute = klass.variableAttribute numParameters = len(fixedAttributes) parameters = [tokenizer.getToken() for _ in range(numParameters)] attributeValues = [x[1][1] for x in parameters] inst = classes.instanceFactory(lexem.title(), **OrderedDict(zip(fixedAttributes, attributeValues))) if variableAttribute: attr = klass[variableAttribute] result = [] while True: self.lineNo, (tokenType, lexem) = tokenizer.getToken() print(tokenType, lexem) if tokenType in (TokenType.KEYWORD, TokenType.END): tokenizer.stepBack() break result.append(lexem) setattr(inst, attr[1], result) inst.attrs.append(attr[1]) elif tokenType == TokenType.KEYWORD and lexem in ('COMPU_TAB', 'COMPU_VTAB', 'COMPU_VTAB_RANGE'): # # COMPU_TAB / COMPU_VTAB / COMPU_VTAB_RANGE require special attention. # attribute = "Items" if lexem == 'COMPU_VTAB_RANGE': sliceLength = 3 valueClass = classes.CompuTriplet variablePart = [tokenizer.getToken() for _ in range(inst.NumberValueTriples * sliceLength)] else: valueClass = classes.CompuPair sliceLength = 2 variablePart = [tokenizer.getToken() for _ in range(inst.NumberValuePairs * sliceLength)] variablePartValues = [v[1][1] for v in variablePart] result = slicer(variablePartValues, sliceLength, valueClass) inst.attrs.append(attribute) setattr(inst, attribute, result) #print inst instanceStack[-1].children.append(inst) if pushToInstanceStack: instanceStack.append(inst) pushToInstanceStack = False def uncomment(self, fp): # Nested comments are not supported! result = [] multiLineComment = False inComment = False returnCode = True for lineNo, line in enumerate(fp): # Bad style state-machine... self.lineNo = lineNo if not multiLineComment: if '//' in line: cmtPos = line.index('//') line = line[ : cmtPos].strip() if line: result.append(line) elif '/*' in line: cmtPos = line.index('/*') startLineNo = lineNo if not '*/' in line: multiLineComment = True inComment = True line = line[ : cmtPos].strip() if line: result.append(line) else: if line: result.append(line) else: if '*/' in line: cmtPos = line.index('*/') result.append(line[cmtPos + 2: ].strip()) multiLineComment = False inComment = False elif '/*' in line: if inComment: self.logger.error("Nested comments are not allowed.") returnCode = False if multiLineComment: self.logger.error("Premature end-of-file while processing comment.") returnCode = False return (returnCode, result)