def parse(self, header): comment = self._comment() quoted = quotedString.copy().setParseAction(removeQuotes) string = quoted | Word(printables, excludeChars='{},%') enum_value = quotedString | Word(printables, excludeChars='{},%') relation = (Suppress(CaselessLiteral("@relation")) + Optional(restOfLine, default='default_name')('rel_name').setParseAction(lambda t: t.rel_name.strip())) relation_part = ZeroOrMore(comment) + relation + ZeroOrMore(comment) nominal = (Empty().copy().setParseAction(lambda t: self.ENUM) + Suppress(Literal("{")) + Group(delimitedList(enum_value, delim=self._separator))("next_arg").setParseAction(self.get_values) + Suppress(Literal("}"))) date = CaselessLiteral("date") + Optional(CharsNotIn("{},\n"))("next_arg").setParseAction(self._adapt_date_format) attributes_part = Forward() relational = CaselessLiteral("relational") + attributes_part + Suppress(CaselessLiteral("@end")) + string attr_type = (CaselessLiteral("numeric") | CaselessLiteral("string") | nominal | date | relational)("attr_type") attribute = Suppress(CaselessLiteral("@attribute")) + (string.copy())("attr_name") + attr_type attribute_line = comment | attribute attributes_part << (Group(OneOrMore(attribute_line)))("children") data_part = (CaselessLiteral("@data"))("data_start").setParseAction(lambda s, p, k: (lineno(p, s))) arff_header = relation_part + attributes_part + data_part attribute.setParseAction(self._create_attribute) try: result = arff_header.parseString(header, parseAll=True) except ParseException as e: raise HeaderError(FileType.ARFF, e.lineno, e.col, e.line, e) self._relation_name = result.rel_name self._find_relational(result.children) self._linearize_attrs(result.children) self._data_start = result.data_start self._index = 0
def init_parser(self): INTEGER = Word(nums) INTEGER.setParseAction(lambda x: int(x[0])) header = INTEGER("species_count") + INTEGER("sequence_length") +\ Suppress(restOfLine) header.setParseAction(self.set_header) sequence_name = Word( alphas + nums + "!#$%&\'*+-./;<=>?@[\\]^_`{|}~", max=100) # Take a copy and disallow line breaks in the bases bases = self.BASES.copy() bases.setWhitespaceChars(" \t") seq_start = sequence_name("species") + bases( "sequence") + Suppress(LineEnd()) seq_start.setParseAction(self.set_seq_start) seq_start_block = OneOrMore(seq_start) seq_start_block.setParseAction(self.set_start_block) seq_continue = bases("sequence") + Suppress(LineEnd()) seq_continue.setParseAction(self.set_seq_continue) seq_continue_block = Suppress(LineEnd()) + OneOrMore(seq_continue) seq_continue_block.setParseAction(self.set_continue_block) return header + seq_start_block + ZeroOrMore(seq_continue_block)
def __init__(self, cfg): self.cfg = cfg if cfg.datatype == "protein": letters = _protein_letters elif cfg.datatype == "DNA": letters = _dna_letters elif cfg.datatype == "morphology": letters = "0123456789" else: log.error("Unknown datatype '%s', please check" % self.cfg.datatype) raise util.PartitionFinderError self.rate_indexes = self.cfg.data_layout.rate_indexes self.freq_indexes = self.cfg.data_layout.letter_indexes FLOAT = Word(nums + '.-').setParseAction(lambda x: float(x[0])) L = Word(letters, exact=1) COLON = Suppress(":") LNL_LABEL = Regex("Final GAMMA.+:") | Literal("Likelihood:") TIME_LABEL = Regex("Overall Time.+:") | Regex("Overall Time.+tion ") ALPHA_LABEL = Literal("alpha:") TREE_SIZE_LABEL = Literal("Tree-Length:") def labeled_float(label): return Suppress(SkipTo(label)) + Suppress(label) + FLOAT lnl = labeled_float(LNL_LABEL) lnl.setParseAction(self.set_lnl) seconds = labeled_float(TIME_LABEL) seconds.setParseAction(self.set_seconds) alpha = labeled_float(ALPHA_LABEL) alpha.setParseAction(self.set_alpha) tree_size = labeled_float(TREE_SIZE_LABEL) tree_size.setParseAction(self.set_tree_size) LG4X_LINE = "LG4X" + restOfLine lg4x = Optional(LG4X_LINE + LG4X_LINE) rate = Suppress("rate") + L + Suppress("<->") + L + COLON + FLOAT rate.setParseAction(self.set_rate) rates = OneOrMore(rate) freq = Suppress("freq pi(") + L + Suppress("):") + FLOAT freq.setParseAction(self.set_freq) freqs = OneOrMore(freq) LGM_LINE = "LGM" + restOfLine rate_block = Optional(LGM_LINE) + rates + freqs rate_block.setParseAction(self.rate_block) # Just look for these things self.root_parser = seconds + lnl + alpha + tree_size +\ lg4x + OneOrMore(rate_block)
def parse(date_string): # Parser for individual dates days_of_week = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday', 'Mon', 'Tue', 'Wed', 'Thurs', 'Fri', 'Sat', 'Sun'] suffixes = Literal('nd') | Literal('rd') | Literal('st') | Literal('th') day_of_month = Group(Word(nums) + Suppress(Optional(suffixes))).setResultsName('day') single_date = Optional(DateParser._build_literal(days_of_week)).setResultsName('dow') + day_of_month + \ Optional(DateParser._build_literal(LONG_MONTHS + SHORT_MONTHS)).setResultsName('month') + \ Optional(Word(nums)).setResultsName('year') single_date.setParseAction(SingleDate) # Parser for date ranges date_range_separators = DateParser._build_literal(['-', 'until', 'to']) date_range = Suppress(Optional('From')) + single_date.setResultsName('start_date') + \ Suppress(date_range_separators) + single_date.setResultsName('end_date') date_range.setParseAction(DateRange) date_parser = (date_range | single_date) + stringEnd result = date_parser.parseString(date_string) return result
def __init__(self, cfg): self.cfg = cfg if cfg.datatype == "protein": letters = _protein_letters elif cfg.datatype == "DNA": letters = _dna_letters else: log.error("Unknown datatype '%s', please check" % self.cfg.datatype) raise util.PartitionFinderError self.rate_indexes = self.cfg.data_layout.rate_indexes self.freq_indexes = self.cfg.data_layout.letter_indexes FLOAT = Word(nums + '.-').setParseAction(lambda x: float(x[0])) L = Word(letters, exact=1) COLON = Suppress(":") LNL_LABEL = Regex("Final GAMMA.+:") | Literal("Likelihood:") TIME_LABEL = Regex("Overall Time.+:") | Regex("Overall Time.+tion ") ALPHA_LABEL = Literal("alpha:") TREE_SIZE_LABEL = Literal("Tree-Length:") def labeled_float(label): return Suppress(SkipTo(label)) + Suppress(label) + FLOAT lnl = labeled_float(LNL_LABEL) lnl.setParseAction(self.set_lnl) seconds = labeled_float(TIME_LABEL) seconds.setParseAction(self.set_seconds) alpha = labeled_float(ALPHA_LABEL) alpha.setParseAction(self.set_alpha) tree_size = labeled_float(TREE_SIZE_LABEL) tree_size.setParseAction(self.set_tree_size) LG4X_LINE = "LG4X" + restOfLine lg4x = Optional(LG4X_LINE + LG4X_LINE) rate = Suppress("rate") + L + Suppress("<->") + L + COLON + FLOAT rate.setParseAction(self.set_rate) rates = OneOrMore(rate) freq = Suppress("freq pi(") + L + Suppress("):") + FLOAT freq.setParseAction(self.set_freq) freqs = OneOrMore(freq) LGM_LINE = "LGM" + restOfLine rate_block = Optional(LGM_LINE) + rates + freqs rate_block.setParseAction(self.rate_block) # Just look for these things self.root_parser = seconds + lnl + alpha + tree_size +\ lg4x + OneOrMore(rate_block)
def get_parser(self, EXPRESSION): result = Suppress(self._name) + Suppress('(') + EXPRESSION for i in range(1, self._n_args): result += Suppress(',') + EXPRESSION result += Suppress(')') result.setName('Function({name})'.format(name=self._name)) result.setParseAction(self) return result
def get_parser(self, EXPRESSION): if isinstance(self.value, str): result = Suppress(self.value) result.setName('NamedConstant({value})'.format(value=self.value)) result.setParseAction(self) return result else: # TODO Detect constants? return None
def setup(self): # some expressions that will be reused units = [] for unit in time_units: units.append(Keyword(unit)) units = get_match_first(units) units = units.setResultsName("unit") units.setParseAction(lambda s, l, tok: time_units[tok[0]]) multiplier = Word(nums) multiplier = multiplier.setResultsName("multiply") multiplier.setParseAction(self.parseMulti) adder = [] for add in add_modifiers: adder.append(CL(add)) adder = get_match_first(adder) adder = adder.setResultsName("add") adder.setParseAction(self.parseAdd) modifier = (multiplier | adder) # + FollowedBy(units) # ago # # e.g 5 days ago ago = Optional(modifier) + units + Suppress(Word("ago")) ago.setParseAction(self.parseAgo) # time range # # e.g in the lat 10 days time_range = Suppress(Optional( CL("in the"))) + \ Suppress(Word("last") | Word("past")) + \ Optional(modifier) + \ units time_range.setParseAction(self.parseRange) # special keyword handling # # e.g yesterday # only handles yesterday right now, maybe need to be modified to do # more special_expr = [] for expr in special: special_expr.append( Keyword(expr).setParseAction( lambda s, l, tok: special[tok[0]])) special_expr = get_match_first(special_expr) special_expr = special_expr.setResultsName("unit") special_expr.setParseAction(self.parseAgo) parser = (special_expr | ago | time_range) return parser
def stream_query(): ''' <stream-query> ::= 'SELECT' <stream-operation> '(' <table-window> ')' ''' from grammar.keywords import SELECT_KEYWORD from grammar.parsed import ParsedStreamQuery stream_q = \ Suppress(SELECT_KEYWORD) + stream_term() stream_q.setParseAction(ParsedStreamQuery) return stream_q
class JenkinsFileParser: STAGE_KEY = 'stage' COMMENTED_STAGE_KEY = 'commented_stage' def __init__(self, filename='Jenkinsfile'): self.filename = filename self.create_grammar() def create_grammar(self): self.beg = SkipTo(LineStart() + Literal('/*')*(0, 1) + Literal('stage'), ignore=Literal('stages')) self.block = Forward() self.parallel = Suppress('parallel') + self.nested(self.block) self.parallel.setParseAction(lambda t: t[0]) self.environment = Suppress('environment') + self.nested() self.stage_content = ( self.nested((self.parallel | self.environment.suppress()), 'parallel') | self.nested().suppress() ) self.stage = Group( Suppress('stage' + '(') + quotedString('stage_name').setParseAction(removeQuotes) + Suppress(')') + self.stage_content)( self.STAGE_KEY + '*' ) self.commented_stage = Group(Suppress('/*') + self.stage + Suppress('*/'))(self.COMMENTED_STAGE_KEY + '*') self.any_stage = self.stage | self.commented_stage self.block << Group(self.parallel | self.any_stage)('block*') @staticmethod def nested(elem=None, name=None): expr = nestedExpr('{', '}', content=elem, ignoreExpr=Literal('*/')) if name: return expr.setResultsName(name) return expr def evaluate_stages(self): a = self.beg.suppress() + self.block[...] test = a.parseFile(self.filename) # print(test.asDict()) # print(json.dumps(test.asDict(), indent=4)) return test.asDict() def find_stage_by_name(self, name, content): quoted_name = (Literal('"') | Literal("'")).suppress() + name + (Literal('"') | Literal("'")).suppress() # named_stage = Literal('/*')*(0, 1) + 'stage' + '(' + quoted_name + ')' + self.nested() + Literal('*/')*(0, 1) named_stage = 'stage' + '(' + quoted_name + ')' + self.nested() commented_named_stage = Literal('/*') + 'stage' + '(' + quoted_name + ')' + self.nested() + Literal('*/') return next((named_stage | commented_named_stage).scanString(content))
def expr_parser(): num = stl.num_parser() T_UND = Suppress(Literal("_")) T_LE = Literal("<=") T_GR = Literal(">") integer = Word(nums).setParseAction(lambda t: int(t[0])) relation = (T_LE | T_GR).setParseAction(lambda t: Relation.LE if t[0] == "<=" else Relation.GT) expr = Suppress(Word(alphas)) + T_UND + integer + relation + num expr.setParseAction(lambda t: LLTSignal(t[0], t[1], t[2])) return expr
def parser(): global _parser if _parser is None: ParserElement.setDefaultWhitespaceChars("") lbrack = Literal("[") rbrack = Literal("]") lbrace = Literal("{") rbrace = Literal("}") lparen = Literal("(") rparen = Literal(")") reMacro = Suppress("\\") + oneOf(list("dwsZ")) escapedChar = ~reMacro + Combine("\\" + oneOf(list(printables))) reLiteralChar = "".join(c for c in string.printable if c not in r"\[]{}().*?+|") reRange = Combine(lbrack.suppress() + SkipTo(rbrack,ignore=escapedChar) + rbrack.suppress()) reLiteral = ( escapedChar | oneOf(list(reLiteralChar)) ) reDot = Literal(".") repetition = ( ( lbrace + Word(nums).setResultsName("count") + rbrace ) | ( lbrace + Word(nums).setResultsName("minCount")+","+ Word(nums).setResultsName("maxCount") + rbrace ) | oneOf(list("*+?")) ) reExpr = Forward() reGroup = (lparen.suppress() + Optional(Literal("?").suppress() + oneOf(list(":P"))).setResultsName("option") + reExpr.setResultsName("expr") + rparen.suppress()) reTerm = ( reLiteral | reRange | reMacro | reDot | reGroup ) reExpr << operatorPrecedence( reTerm, [ (repetition, 1, opAssoc.LEFT, create(Repetition)), (None, 2, opAssoc.LEFT, create(Sequence)), (Suppress('|'), 2, opAssoc.LEFT, create(Alternation)), ] ) reGroup.setParseAction(create(Group)) reRange.setParseAction(create(Range)) reLiteral.setParseAction(create(Character)) reMacro.setParseAction(create(Macro)) reDot.setParseAction(create(Dot)) _parser = reExpr return _parser
def parser(): global _parser if _parser is None: ParserElement.setDefaultWhitespaceChars("") lbrack = Literal("[") rbrack = Literal("]") lbrace = Literal("{") rbrace = Literal("}") lparen = Literal("(") rparen = Literal(")") reMacro = Suppress("\\") + oneOf(list("dwsZ")) escapedChar = ~reMacro + Combine("\\" + oneOf(list(printables))) reLiteralChar = "".join(c for c in string.printable if c not in r"\[]{}().*?+|") reRange = Combine(lbrack.suppress() + SkipTo(rbrack, ignore=escapedChar) + rbrack.suppress()) reLiteral = (escapedChar | oneOf(list(reLiteralChar))) reDot = Literal(".") repetition = ((lbrace + Word(nums).setResultsName("count") + rbrace) | (lbrace + Word(nums).setResultsName("minCount") + "," + Word(nums).setResultsName("maxCount") + rbrace) | oneOf(list("*+?"))) reExpr = Forward() reGroup = (lparen.suppress() + Optional(Literal("?").suppress() + oneOf(list(":P"))).setResultsName("option") + reExpr.setResultsName("expr") + rparen.suppress()) reTerm = (reLiteral | reRange | reMacro | reDot | reGroup) reExpr << operatorPrecedence(reTerm, [ (repetition, 1, opAssoc.LEFT, create(Repetition)), (None, 2, opAssoc.LEFT, create(Sequence)), (Suppress('|'), 2, opAssoc.LEFT, create(Alternation)), ]) reGroup.setParseAction(create(Group)) reRange.setParseAction(create(Range)) reLiteral.setParseAction(create(Character)) reMacro.setParseAction(create(Macro)) reDot.setParseAction(create(Dot)) _parser = reExpr return _parser
class Compiler: def __init__(self): self._pythonVar = None self.varNames = [] Preprocess.setSocialiteModule(getModuleVar()) def pythonVar(self): if not self._pythonVar: from pyparsing import (ParserElement, Word, alphas, alphanums, Literal, Suppress, FollowedBy) _ws = ' \t' ParserElement.setDefaultWhitespaceChars(_ws) ident = Word(alphas + "_", alphanums + "_") lparen = Literal("(") dot = Literal(".") dollar = Literal("$") self._pythonVar = Suppress(dollar) + ident + ~FollowedBy( (dot + ident) | lparen) self._pythonVar.setParseAction(self.onPythonVar) return self._pythonVar def compile(self, src): gen = Preprocess.run(src) return gen def processPythonVars(self, query): query = '(' + query + ')' tmp = query if tmp.find("$") >= 0: tmp = self.pythonVar().transformString(query) if self.varNames: query = ''.join([ tmp, "%" + getPassVarsFunc() + "(", ','.join(self.varNames), ")" ]) else: query = tmp for i in xrange(len(self.varNames)): self.varNames.pop() return query def onPythonVar(self, inputStr, loc, tokens): varName = ''.join(tokens) self.varNames.append(varName) return "%s"
def detect_token(jade): doctype = LineStart() + oneOf('!!! doctype') + Optional(oneOf('5 html xml' \ + ' default transitional strict frameset 1.1 basic mobile', True)) doctype.setParseAction(parse_doctype) element_id = Suppress('#') + Word(alphanums + '_' + '-') element_class = Suppress('.') + Word(alphanums + '_' + '-') selectors = (element_id.setResultsName('element_id') \ + ZeroOrMore(element_class).setResultsName('element_class')) \ | (OneOrMore(element_class).setResultsName('element_class') \ + Optional(element_id).setResultsName('element_id')) selectors.setParseAction(parse_selectors) element = selectors.setResultsName('selectors') \ | (Word(alphas).setResultsName('element_name') \ + Optional(selectors).setResultsName('selectors')) element.setParseAction(parse_element) attribute = CharsNotIn('('+')') attributes = nestedExpr(content=attribute) tag = element.setResultsName('element') \ + Optional(attributes).setResultsName('attributes') tag.setParseAction(parse_tag) # TODO: block-comment and conditional-comment unbuffered_comment = Suppress(Suppress('//-') + restOfLine) buffered_comment = Suppress('//') + restOfLine buffered_comment.setParseAction(parse_buffered_comment) # Order matters here, as buffered will pick up # unbuffered comments if set first comment = unbuffered_comment | buffered_comment source = doctype | tag | comment parsed = source.parseString(jade) return ' '.join(parsed) '''
def __init__(self, datatype): if datatype == "protein": letters = "ARNDCQEGHILKMFPSTWYV" elif datatype == "DNA": letters = "ATCG" else: log.error("Unknown datatype '%s', please check" % datatype) raise RaxmlError FLOAT = Word(nums + '.-').setParseAction(lambda x: float(x[0])) L = Word(letters, exact=1) COLON = Suppress(":") LNL_LABEL = Regex("Final GAMMA.+:") | Literal("Likelihood:") TIME_LABEL = Regex("Overall Time.+:") | Regex("Overall Time.+tion ") ALPHA_LABEL = Literal("alpha:") TREE_SIZE_LABEL = Literal("Tree-Length:") def labeled_float(label): return Suppress(SkipTo(label)) + Suppress(label) + FLOAT lnl = labeled_float(LNL_LABEL) lnl.setParseAction(self.set_lnl) seconds = labeled_float(TIME_LABEL) seconds.setParseAction(self.set_seconds) alpha = labeled_float(ALPHA_LABEL) alpha.setParseAction(self.set_alpha) tree_size = labeled_float(TREE_SIZE_LABEL) tree_size.setParseAction(self.set_tree_size) rate = Suppress("rate") + L + Suppress("<->") + L + COLON + FLOAT rate.setParseAction(self.set_rate) rates = OneOrMore(rate) freq = Suppress("freq pi(") + L + Suppress("):") + FLOAT freq.setParseAction(self.set_freq) freqs = OneOrMore(freq) # Just look for these things self.root_parser = seconds + lnl + alpha + tree_size + rates + freqs
def parse(self, header): comment = self._comment() quoted = quotedString.copy().setParseAction(removeQuotes) string = quoted | Word(printables, excludeChars='{},%') enum_value = quotedString | Word(printables, excludeChars='{},%') relation = (Suppress(CaselessLiteral("@relation")) + Optional(restOfLine, default='default_name') ('rel_name').setParseAction(lambda t: t.rel_name.strip())) relation_part = ZeroOrMore(comment) + relation + ZeroOrMore(comment) nominal = (Empty().copy().setParseAction(lambda t: self.ENUM) + Suppress(Literal("{")) + Group(delimitedList(enum_value, delim=self._separator)) ("next_arg").setParseAction(self.get_values) + Suppress(Literal("}"))) date = CaselessLiteral("date") + Optional( CharsNotIn("{},\n"))("next_arg").setParseAction( self._adapt_date_format) attributes_part = Forward() relational = CaselessLiteral( "relational") + attributes_part + Suppress( CaselessLiteral("@end")) + string attr_type = (CaselessLiteral("numeric") | CaselessLiteral("string") | nominal | date | relational)("attr_type") attribute = Suppress(CaselessLiteral("@attribute")) + ( string.copy())("attr_name") + attr_type attribute_line = comment | attribute attributes_part << (Group(OneOrMore(attribute_line)))("children") data_part = (CaselessLiteral("@data"))("data_start").setParseAction( lambda s, p, k: (lineno(p, s))) arff_header = relation_part + attributes_part + data_part attribute.setParseAction(self._create_attribute) try: result = arff_header.parseString(header, parseAll=True) except ParseException as e: raise HeaderError(FileType.ARFF, e.lineno, e.col, e.line, e) self._relation_name = result.rel_name self._find_relational(result.children) self._linearize_attrs(result.children) self._data_start = result.data_start self._index = 0
class Compiler: def __init__(self): self._pythonVar = None self.varNames = [] Preprocess.setSocialiteModule(getModuleVar()) def pythonVar(self): if not self._pythonVar: from pyparsing import (ParserElement, Word, alphas, alphanums, Literal, Suppress, FollowedBy) _ws = ' \t' ParserElement.setDefaultWhitespaceChars(_ws) ident = Word(alphas+"_", alphanums+"_") lparen = Literal("(") dot = Literal(".") dollar = Literal("$") self._pythonVar = Suppress(dollar) + ident + ~FollowedBy((dot+ident) | lparen) self._pythonVar.setParseAction(self.onPythonVar) return self._pythonVar def compile(self, src): gen=Preprocess.run(src) return gen def processPythonVars(self, query): query = '('+query+')' tmp = query if tmp.find("$") >= 0: tmp = self.pythonVar().transformString(query) if self.varNames: query = ''.join([tmp, "%"+getPassVarsFunc()+"(", ','.join(self.varNames), ")"]) else: query = tmp for i in xrange(len(self.varNames)): self.varNames.pop() return query def onPythonVar(self, inputStr, loc, tokens): varName = ''.join(tokens) self.varNames.append(varName) return "%s"
def ListParser(): """ A parser for list columns, where each list is composed of pairs of values. """ value = Regex(r'[-+]?[0-9]+(?:\.[0-9]*)?(?:e[-+]?[0-9]+)?', IGNORECASE) value.setParseAction(lambda toks: float(toks[0])) item = Suppress('(') + value + Suppress(',') + value + Suppress(')') item.setParseAction(tuple) lst = Suppress('[') + delimitedList(item) + Suppress(']') lst.setParseAction(list) def parse(s): try: return lst.parseString(s).asList() except ParseBaseException as e: raise ValueError(e) return parse
def getEbnfParser(symbols): """ Returns an EBNF parser for the command language. """ identifier = Word(alphas + '_', alphanums + '_') string = quotedString.setParseAction( lambda t: symbols.append((t[0][1:-1], TokenType.StrLit)) ) integer = Word(nums).setParseAction( lambda t: symbols.append((int(t[0]), TokenType.NumLit)) ) var = Suppress("$") + identifier var.setParseAction( lambda t: symbols.append((t[0], TokenType.Var)) ) literal = var | string | integer fnid = Suppress(Optional(".")) + identifier fnid.setParseAction( lambda t: symbols.append((t[0], TokenType.Call)) ) call = Forward() callb = fnid + ZeroOrMore(call | literal) call << ((Suppress("(") + callb + Suppress(")")) | callb) fndef_head = Suppress("let") + identifier fndef_head.setParseAction( lambda t: symbols.append((t[0], TokenType.Def)) ) definition = fndef_head + ZeroOrMore(var) + Suppress("=") + call cmd = OneOrMore((definition | call) + Word(";").setParseAction( lambda t: symbols.append((t[0], TokenType.End)) )) msg = OneOrMore(cmd) return msg
def _parse_data(data: str) -> List[_PackageData]: lpar, rpar, lbrk, rbrk, dot = map(Suppress, '()[].') nil = Suppress('nil') pkgname = Word(printables) decimal = Regex(r'0|-?[1-9]\d*').setParseAction(lambda t: int(t[0])) qstring = QuotedString(quoteChar='"', escChar='\\') version = (lpar + OneOrMore(decimal) + rpar).setParseAction(lambda s, l, t: ['.'.join(map(str, t))]) dependency_entry = lpar + pkgname + version + rpar dependency_list = ((lpar + OneOrMore(dependency_entry) + rpar) | nil) people_list = OneOrMore(qstring | dot | nil) keyval_url = (lpar + (Suppress(':url') | Suppress(':homepage')) + dot + qstring + rpar).setParseAction(lambda s, l, t: [('url', t[0])]) keyval_keywords = (lpar + Suppress(':keywords') + ZeroOrMore(qstring) + rpar).setParseAction( lambda s, l, t: [('keywords', [str(k) for k in t])]) keyval_commit = (lpar + Suppress(':commit') + dot + qstring + rpar).setParseAction(lambda s, l, t: [('commit', t[0])]) keyval_maintainer = ( lpar + Suppress(':maintainer') + people_list + rpar ).setParseAction(lambda s, l, t: [('maintainer', [str(m) for m in t])]) keyval_author = ( lpar + Suppress(':author') + people_list + rpar).setParseAction(lambda s, l, t: [('author', [str(a) for a in t])]) keyval_authors = (lpar + Suppress(':authors') + OneOrMore(lpar + people_list + rpar) + rpar).setParseAction( lambda s, l, t: [('authors', [str(a) for a in t])]) keyval_item = keyval_url | keyval_keywords | keyval_commit | keyval_maintainer | keyval_authors | keyval_author keyvals = (lpar + ZeroOrMore(keyval_item) + rpar ).setParseAction(lambda s, l, t: [{k: v for k, v in t}] ) | nil.setParseAction(lambda s, l, t: [{}]) package_entry = (lpar + pkgname + dot + lbrk + version + Suppress(dependency_list) + qstring + Suppress(Word(alphas)) + keyvals + rbrk + rpar).setParseAction(lambda s, l, t: [_PackageData(*t)]) root = lpar + Suppress(decimal) + ZeroOrMore(package_entry) + rpar return root.parseString(data, parseAll=True) # type: ignore
message_field = ( (Keyword("required") | Keyword("optional") | Keyword("repeated")) + field_type + field_name + Suppress("=") + field_number + Suppress(";") ) message_field.setParseAction(Field) message_name = Regex("[A-Za-z_]+") message_spec = ( Suppress(Keyword("message")) + message_name + Suppress("{") + Group(ZeroOrMore(message_field)) + Suppress("}") ) message_spec.setParseAction(Message) option_spec = Suppress(Keyword("option")) + Regex("[a-z_]+") + Suppress("=") + Regex('"[^"]*"') + Suppress(";") option_spec.setParseAction(lambda tokens: (tokens[0], tokens[1][1:-1])) option_list = ZeroOrMore(option_spec) option_list.setParseAction(lambda tokens: dict(tokens.asList())) message_list = Group(ZeroOrMore(message_spec)) proto_file = (option_list + message_list).ignore(comment) if len(sys.argv) < 3: print "usage: simpleproto some_file.proto outputfolder" print "The output will be placed in a folder within outputfolder" print "appropriate for the package specified in the file's java_package" print "option, in the file specified by java_outer_classname." options, messages = proto_file.parseFile(sys.argv[1], parseAll=True).asList() output_class = options["java_outer_classname"] output_package = options["java_package"] output_file_path = os.path.join(sys.argv[2], output_package.replace(".", os.path.sep), output_class + ".java")
DATATYPE = oneOf(PARAMETER_CLASSES.keys()) PARAMETER = DATATYPE('datatype') + Suppress(':') + NAME('name') + \ Suppress('[') + ATTRIBUTES_LIST('properties') + Suppress(']') def _get_parameter(token): """ return Parameter object from tokens """ return get_parameter(token['name'], token['datatype'], token['properties']) PARAMETER.setParseAction(_get_parameter) PARAMETERS_LIST = Group(ZeroOrMore(PARAMETER)).setResultsName('parameters') # variables VARIABLE = Suppress('variable: ') + Word(alphanums)('name') + quotedString( 'value').addParseAction(removeQuotes) def _get_variable(token): """ return Section object from tokens """ return Variable(token['name'], token['value']) VARIABLE.setParseAction(_get_variable) SECTION_CHILDREN_LIST = Forward() SECTIONS_LIST = Forward() SECTION = Suppress('section:') + NAME('name') + Suppress('[') + \ ATTRIBUTES_LIST('properties') + Suppress(']') + \ SECTION_CHILDREN_LIST('children') + Suppress('endsection:') + \ Suppress(NAME) def _get_section(token): """ return Section object from tokens """ return Section(token['name'], properties=token['properties'], children=token['children']) SECTION.setParseAction(_get_section) SECTIONS_LIST << Group(ZeroOrMore(SECTION))
string.ascii_letters + string.digits + ';-', ) attr.leaveWhitespace() attr.setName('attr') hexdigits = Word(string.hexdigits, exact=2) hexdigits.setName('hexdigits') escaped = Suppress(Literal('\\')) + hexdigits escaped.setName('escaped') def _p_escaped(s, l, t): text = t[0] return chr(int(text, 16)) escaped.setParseAction(_p_escaped) value = Combine(OneOrMore(CharsNotIn('*()\\\0') | escaped)) value.setName('value') equal = Literal("=") equal.setParseAction(lambda s, l, t: pureldap.LDAPFilter_equalityMatch) approx = Literal("~=") approx.setParseAction(lambda s, l, t: pureldap.LDAPFilter_approxMatch) greater = Literal(">=") greater.setParseAction(lambda s, l, t: pureldap.LDAPFilter_greaterOrEqual) less = Literal("<=") less.setParseAction(lambda s, l, t: pureldap.LDAPFilter_lessOrEqual) filtertype = equal | approx | greater | less filtertype.setName('filtertype') simple = attr + filtertype + value simple.leaveWhitespace() simple.setName('simple')
def __init__(self, processor, baseiri, strict=False): """ See class docstring. """ # pylint: disable=R0914,R0915 self.reset(processor, baseiri, strict) PrefixedName = PNAME_LN | PNAME_NS Iri = IRIREF | PrefixedName BNode = BLANK_NODE_LABEL | ANON RDFLiteral = STRING + Optional(LANGTAG("langtag") | Group(Suppress("^^") + Iri)("datatype")) Object = Forward() Collection = Suppress("(") + ZeroOrMore(Object) + Suppress(")") PredicateObjectList = Forward() BlankNodePropertyList = Suppress("[") + PredicateObjectList + Suppress("]") TtlLiteral = RDFLiteral | NUMERIC_LITERAL | BOOLEAN_LITERAL Subject = Iri | BNode | Collection | VARIABLE # added for LD Patch Predicate = Iri Object << ( # pylint: disable=W0104 Iri | BNode | Collection | BlankNodePropertyList | TtlLiteral | VARIABLE ) # added for LD Patch Verb = Predicate | Keyword("a") ObjectList = Group(Object + ZeroOrMore(COMMA + Object)) PredicateObjectList << ( # pylint: disable=W0106 Verb + ObjectList + ZeroOrMore(SEMICOLON + Optional(Verb + ObjectList)) ) Triples = (Subject + PredicateObjectList) | (BlankNodePropertyList + Optional(PredicateObjectList)) Value = Iri | TtlLiteral | VARIABLE InvPredicate = Suppress("^") + Predicate Step = Suppress("/") + (Predicate | InvPredicate | INDEX) Filter = Forward() Constraint = Filter | UNICITY_CONSTRAINT Path = Group(OneOrMore(Step | Constraint)) Filter << ( Suppress("[") # pylint: disable=W0106 + Group(ZeroOrMore(Step | Constraint))("path") # = Path (*) + Optional(Suppress("=") + Object)("value") + Suppress("]") ) # (*) we can not reuse the Path rule defined above, # because we want to set a name for that component Turtle = Triples + ZeroOrMore(PERIOD + Triples) + Optional(PERIOD) Graph = Suppress("{") + Optional(Turtle) + Suppress("}") Prefix = Literal("@prefix") + PNAME_NS + IRIREF + PERIOD if not strict: SparqlPrefix = CaselessKeyword("prefix") + PNAME_NS + IRIREF Prefix = Prefix | SparqlPrefix Bind = BIND_CMD + VARIABLE + Value + Optional(Path) + PERIOD Add = ADD_CMD + Graph + PERIOD AddNew = ADDNEW_CMD + Graph + PERIOD Delete = DELETE_CMD + Graph + PERIOD DeleteExisting = DELETEEXISTING_CMD + Graph + PERIOD Cut = CUT_CMD + VARIABLE + PERIOD UpdateList = UPDATELIST_CMD + Subject + Predicate + SLICE + Collection + PERIOD Statement = Prefix | Bind | Add | AddNew | Delete | DeleteExisting | Cut | UpdateList Patch = ZeroOrMore(Statement) if not strict: Patch.ignore("#" + restOfLine) # Comment Patch.parseWithTabs() self.grammar = Patch IRIREF.setParseAction(self._parse_iri) PrefixedName.setParseAction(self._parse_pname) RDFLiteral.setParseAction(self._parse_turtleliteral) Collection.setParseAction(self._parse_collection) BlankNodePropertyList.setParseAction(self._parse_bnpl) Verb.setParseAction(self._parse_verb) ObjectList.setParseAction(self._parse_as_list) Triples.setParseAction(self._parse_tss) InvPredicate.setParseAction(self._parse_invpredicate) Filter.setParseAction(self._parse_filter) Path.setParseAction(self._parse_as_list) Prefix.setParseAction(self._do_prefix) Bind.setParseAction(self._do_bind) Add.setParseAction(self._do_add) AddNew.setParseAction(self._do_add_new) Delete.setParseAction(self._do_delete) DeleteExisting.setParseAction(self._do_delete_existing) Cut.setParseAction(self._do_cut) UpdateList.setParseAction(self._do_updatelist)
class ControlParser(BaseParser): """A parser for BEL control statements. .. seealso:: BEL 1.0 specification on `control records <http://openbel.org/language/version_1.0/bel_specification_version_1.0.html#_control_records>`_ """ def __init__( self, annotation_to_term: Optional[Mapping[str, Set[str]]] = None, annotation_to_pattern: Optional[Mapping[str, Pattern]] = None, annotation_to_local: Optional[Mapping[str, Set[str]]] = None, citation_clearing: bool = True, required_annotations: Optional[List[str]] = None, ) -> None: """Initialize the control statement parser. :param annotation_to_term: A dictionary of {annotation: set of valid values} defined with URL for parsing :param annotation_to_pattern: A dictionary of {annotation: regular expression string} :param annotation_to_local: A dictionary of {annotation: set of valid values} for parsing defined with LIST :param citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations? :param required_annotations: Annotations that are required """ self.citation_clearing = citation_clearing self.annotation_to_term = annotation_to_term or {} self.annotation_to_pattern = annotation_to_pattern or {} self.annotation_to_local = annotation_to_local or {} self.statement_group = None self.citation_db = None self.citation_db_id = None self.evidence = None self.annotations = {} self.required_annotations = required_annotations or [] annotation_key = ppc.identifier('key').setParseAction( self.handle_annotation_key) self.set_statement_group = set_statement_group_stub().setParseAction( self.handle_set_statement_group) self.set_citation = set_citation_stub.setParseAction( self.handle_set_citation) self.set_evidence = set_evidence_stub.setParseAction( self.handle_set_evidence) set_command_prefix = And([annotation_key('key'), Suppress('=')]) self.set_command = set_command_prefix + qid('value') self.set_command.setParseAction(self.handle_set_command) self.set_command_list = set_command_prefix + delimited_quoted_list( 'values') self.set_command_list.setParseAction(self.handle_set_command_list) self.unset_command = annotation_key('key') self.unset_command.addParseAction(self.handle_unset_command) self.unset_evidence = supporting_text_tags(EVIDENCE) self.unset_evidence.setParseAction(self.handle_unset_evidence) self.unset_citation = Suppress(BEL_KEYWORD_CITATION) self.unset_citation.setParseAction(self.handle_unset_citation) self.unset_statement_group = Suppress(BEL_KEYWORD_STATEMENT_GROUP) self.unset_statement_group.setParseAction( self.handle_unset_statement_group) self.unset_list = delimited_unquoted_list('values') self.unset_list.setParseAction(self.handle_unset_list) self.unset_all = unset_all.setParseAction(self.handle_unset_all) self.set_statements = set_tag + MatchFirst([ self.set_statement_group, self.set_citation, self.set_evidence, self.set_command, self.set_command_list, ]) self.unset_statements = unset_tag + MatchFirst([ self.unset_all, self.unset_citation, self.unset_evidence, self.unset_statement_group, self.unset_command, self.unset_list, ]) self.language = self.set_statements | self.unset_statements super(ControlParser, self).__init__(self.language) @property def _in_debug_mode(self) -> bool: return not self.annotation_to_term and not self.annotation_to_pattern @property def citation_is_set(self) -> bool: """Check if the citation is set.""" return self.citation_db is not None and self.citation_db_id is not None def has_enumerated_annotation(self, annotation: str) -> bool: """Check if the annotation is defined as an enumeration.""" return annotation in self.annotation_to_term def has_regex_annotation(self, annotation: str) -> bool: """Check if the annotation is defined as a regular expression.""" return annotation in self.annotation_to_pattern def has_local_annotation(self, annotation: str) -> bool: """Check if the annotation is defined locally.""" return annotation in self.annotation_to_local def has_annotation(self, annotation: str) -> bool: """Check if the annotation is defined.""" return (self.has_enumerated_annotation(annotation) or self.has_regex_annotation(annotation) or self.has_local_annotation(annotation)) def raise_for_undefined_annotation(self, line: str, position: int, annotation: str) -> None: """Raise an exception if the annotation is not defined. :raises: UndefinedAnnotationWarning """ if self._in_debug_mode: return if not self.has_annotation(annotation): raise UndefinedAnnotationWarning(self.get_line_number(), line, position, annotation) def raise_for_invalid_annotation_value(self, line: str, position: int, key: str, value: str) -> None: """Raise an exception if the annotation is not defined. :raises: IllegalAnnotationValueWarning or MissingAnnotationRegexWarning """ if self._in_debug_mode: return if self.has_enumerated_annotation( key) and value not in self.annotation_to_term[key]: raise IllegalAnnotationValueWarning(self.get_line_number(), line, position, key, value) elif self.has_regex_annotation( key) and not self.annotation_to_pattern[key].match(value): raise MissingAnnotationRegexWarning(self.get_line_number(), line, position, key, value) elif self.has_local_annotation( key ) and value not in self.annotation_to_local[key]: # TODO condense raise IllegalAnnotationValueWarning(self.get_line_number(), line, position, key, value) def raise_for_missing_citation(self, line: str, position: int) -> None: """Raise an exception if there is no citation present in the parser. :raises: MissingCitationException """ if self.citation_clearing and not self.citation_is_set: raise MissingCitationException(self.get_line_number(), line, position) def handle_annotation_key(self, line: str, position: int, tokens: ParseResults) -> ParseResults: """Handle an annotation key before parsing to validate that it's either enumerated or as a regex. :raise: MissingCitationException or UndefinedAnnotationWarning """ key = tokens['key'] self.raise_for_missing_citation(line, position) self.raise_for_undefined_annotation(line, position, key) return tokens def handle_set_statement_group(self, _, __, tokens: ParseResults) -> ParseResults: """Handle a ``SET STATEMENT_GROUP = "X"`` statement.""" self.statement_group = tokens['group'] return tokens def handle_set_citation(self, line: str, position: int, tokens: ParseResults) -> ParseResults: """Handle a ``SET Citation = {"X", "Y", "Z", ...}`` statement.""" self.clear_citation() values = tokens['values'] if len(values) < 2: raise CitationTooShortException(self.get_line_number(), line, position) citation_db = values[0] if citation_db not in CITATION_TYPES: raise InvalidCitationType(self.get_line_number(), line, position, citation_db) if 2 == len(values): citation_db_id = values[1] elif 6 < len(values): raise CitationTooLongException(self.get_line_number(), line, position) else: if 3 == len(values): logger.warning('Throwing away JOURNAL entry in position 2') else: logger.warning( 'Throwing away JOURNAL entry in position 2 and everything after position 3' ) citation_db_id = values[2] if citation_db == CITATION_TYPE_PUBMED and not is_int(citation_db_id): raise InvalidPubMedIdentifierWarning(self.get_line_number(), line, position, citation_db_id) self.citation_db = citation_db self.citation_db_id = citation_db_id return tokens def handle_set_evidence(self, _, __, tokens: ParseResults) -> ParseResults: """Handle a ``SET Evidence = ""`` statement.""" self.evidence = tokens['value'] return tokens def handle_set_command(self, line: str, position: int, tokens: ParseResults) -> ParseResults: """Handle a ``SET X = "Y"`` statement.""" key, value = tokens['key'], tokens['value'] self.raise_for_invalid_annotation_value(line, position, key, value) self.annotations[key] = value return tokens def handle_set_command_list(self, line: str, position: int, tokens: ParseResults) -> ParseResults: """Handle a ``SET X = {"Y", "Z", ...}`` statement.""" key, values = tokens['key'], tokens['values'] for value in values: self.raise_for_invalid_annotation_value(line, position, key, value) self.annotations[key] = set(values) return tokens def handle_unset_statement_group(self, line: str, position: int, tokens: ParseResults) -> ParseResults: """Unset the statement group, or raises an exception if it is not set. :raises: MissingAnnotationKeyWarning """ if self.statement_group is None: raise MissingAnnotationKeyWarning(self.get_line_number(), line, position, BEL_KEYWORD_STATEMENT_GROUP) self.statement_group = None return tokens def handle_unset_citation(self, line: str, position: int, tokens: ParseResults) -> ParseResults: """Unset the citation, or raise an exception if it is not set. :raises: MissingAnnotationKeyWarning """ if not self.citation_is_set: raise MissingAnnotationKeyWarning(self.get_line_number(), line, position, BEL_KEYWORD_CITATION) self.clear_citation() return tokens def handle_unset_evidence(self, line: str, position: int, tokens: ParseResults) -> ParseResults: """Unset the evidence, or throws an exception if it is not already set. The value for ``tokens[EVIDENCE]`` corresponds to which alternate of SupportingText or Evidence was used in the BEL script. :raises: MissingAnnotationKeyWarning """ if self.evidence is None: raise MissingAnnotationKeyWarning(self.get_line_number(), line, position, tokens[EVIDENCE]) self.evidence = None return tokens def validate_unset_command(self, line: str, position: int, annotation: str) -> None: """Raise an exception when trying to ``UNSET X`` if ``X`` is not already set. :raises: MissingAnnotationKeyWarning """ if annotation not in self.annotations: raise MissingAnnotationKeyWarning(self.get_line_number(), line, position, annotation) def handle_unset_command(self, line: str, position: int, tokens: ParseResults) -> ParseResults: """Handle an ``UNSET X`` statement or raises an exception if it is not already set. :raises: MissingAnnotationKeyWarning """ key = tokens['key'] self.validate_unset_command(line, position, key) del self.annotations[key] return tokens def handle_unset_list(self, line: str, position: int, tokens: ParseResults) -> ParseResults: """Handle ``UNSET {A, B, ...}`` or raises an exception of any of them are not present. Consider that all unsets are in peril if just one of them is wrong! :raises: MissingAnnotationKeyWarning """ for key in tokens['values']: if key in {BEL_KEYWORD_EVIDENCE, BEL_KEYWORD_SUPPORT}: self.evidence = None else: self.validate_unset_command(line, position, key) del self.annotations[key] return tokens def handle_unset_all(self, _, __, tokens) -> ParseResults: """Handle an ``UNSET_ALL`` statement.""" self.clear() return tokens def get_annotations(self) -> Dict: """Get the current annotations.""" return { EVIDENCE: self.evidence, CITATION: self.get_citation(), ANNOTATIONS: self.annotations.copy(), } def get_citation(self) -> Mapping[str, str]: """Get the citation dictionary.""" return citation_dict(db=self.citation_db, db_id=self.citation_db_id) def get_missing_required_annotations(self) -> List[str]: """Return missing required annotations.""" return [ required_annotation for required_annotation in self.required_annotations if required_annotation not in self.annotations ] def clear_citation(self) -> None: """Clear the citation and if citation clearing is enabled, clear the evidence and annotations.""" self.citation_db = None self.citation_db_id = None if self.citation_clearing: self.evidence = None self.annotations.clear() def clear(self) -> None: """Clear the statement_group, citation, evidence, and annotations.""" self.statement_group = None self.citation_db = None self.citation_db_id = None self.evidence = None self.annotations.clear()
DECIMAL = Regex(r'[0-9]*\.[0-9]+') # (?![eE]) # DECIMAL.setResultsName('decimal') DECIMAL.setParseAction( lambda x: rdflib.Literal(x[0], datatype=rdflib.XSD.decimal)) # [148] DOUBLE ::= [0-9]+ '.' [0-9]* EXPONENT | '.' ([0-9])+ EXPONENT | ([0-9])+ EXPONENT DOUBLE = Regex( r'[0-9]+\.[0-9]*%(e)s|\.([0-9])+%(e)s|[0-9]+%(e)s' % {'e': EXPONENT_re}) # DOUBLE.setResultsName('double') DOUBLE.setParseAction( lambda x: rdflib.Literal(x[0], datatype=rdflib.XSD.double)) # [149] INTEGER_POSITIVE ::= '+' INTEGER INTEGER_POSITIVE = Suppress('+') + INTEGER.copy().leaveWhitespace() INTEGER_POSITIVE.setParseAction(lambda x: rdflib.Literal( "+" + x[0], datatype=rdflib.XSD.integer)) # [150] DECIMAL_POSITIVE ::= '+' DECIMAL DECIMAL_POSITIVE = Suppress('+') + DECIMAL.copy().leaveWhitespace() # [151] DOUBLE_POSITIVE ::= '+' DOUBLE DOUBLE_POSITIVE = Suppress('+') + DOUBLE.copy().leaveWhitespace() # [152] INTEGER_NEGATIVE ::= '-' INTEGER INTEGER_NEGATIVE = Suppress('-') + INTEGER.copy().leaveWhitespace() INTEGER_NEGATIVE.setParseAction(lambda x: neg(x[0])) # [153] DECIMAL_NEGATIVE ::= '-' DECIMAL DECIMAL_NEGATIVE = Suppress('-') + DECIMAL.copy().leaveWhitespace() DECIMAL_NEGATIVE.setParseAction(lambda x: neg(x[0]))
def _build_asn1_grammar(): def build_identifier(prefix_pattern): identifier_suffix = Optional(Word(srange('[-0-9a-zA-Z]'))) identifier = Combine(Word(srange(prefix_pattern), exact=1) + identifier_suffix) # todo: more rigorous? trailing hyphens and -- forbidden return identifier def braced_list(element_rule): return Suppress('{') + Group(delimitedList(element_rule)) + Suppress('}') def annotate(name): def annotation(t): return AnnotatedToken(name, t.asList()) return annotation # Reserved words DEFINITIONS = Keyword('DEFINITIONS') BEGIN = Keyword('BEGIN') END = Keyword('END') OPTIONAL = Keyword('OPTIONAL') DEFAULT = Keyword('DEFAULT') TRUE = Keyword('TRUE') FALSE = Keyword('FALSE') UNIVERSAL = Keyword('UNIVERSAL') APPLICATION = Keyword('APPLICATION') PRIVATE = Keyword('PRIVATE') MIN = Keyword('MIN') MAX = Keyword('MAX') IMPLICIT = Keyword('IMPLICIT') EXPLICIT = Keyword('EXPLICIT') EXPLICIT_TAGS = Keyword('EXPLICIT TAGS') IMPLICIT_TAGS = Keyword('IMPLICIT TAGS') AUTOMATIC_TAGS = Keyword('AUTOMATIC TAGS') EXTENSIBILITY_IMPLIED = Keyword('EXTENSIBILITY IMPLIED') COMPONENTS_OF = Keyword('COMPONENTS OF') ELLIPSIS = Keyword('...') SIZE = Keyword('SIZE') OF = Keyword('OF') IMPORTS = Keyword('IMPORTS') EXPORTS = Keyword('EXPORTS') FROM = Keyword('FROM') # Built-in types SEQUENCE = Keyword('SEQUENCE') SET = Keyword('SET') CHOICE = Keyword('CHOICE') ENUMERATED = Keyword('ENUMERATED') BIT_STRING = Keyword('BIT STRING') BOOLEAN = Keyword('BOOLEAN') REAL = Keyword('REAL') OCTET_STRING = Keyword('OCTET STRING') CHARACTER_STRING = Keyword('CHARACTER STRING') NULL = Keyword('NULL') INTEGER = Keyword('INTEGER') OBJECT_IDENTIFIER = Keyword('OBJECT IDENTIFIER') # Restricted string types BMPString = Keyword('BMPString') GeneralString = Keyword('GeneralString') GraphicString = Keyword('GraphicString') IA5String = Keyword('IA5String') ISO646String = Keyword('ISO646String') NumericString = Keyword('NumericString') PrintableString = Keyword('PrintableString') TeletexString = Keyword('TeletexString') T61String = Keyword('T61String') UniversalString = Keyword('UniversalString') UTF8String = Keyword('UTF8String') VideotexString = Keyword('VideotexString') VisibleString = Keyword('VisibleString') # Useful types GeneralizedTime = Keyword('GeneralizedTime') UTCTime = Keyword('UTCTime') ObjectDescriptor = Keyword('ObjectDescriptor') # Literals number = Word(nums) signed_number = Combine(Optional('-') + number) # todo: consider defined values from 18.1 bstring = Suppress('\'') + StringOf('01') + Suppress('\'B') hstring = Suppress('\'') + StringOf('0123456789ABCDEF') + Suppress('\'H') # Comments hyphen_comment = Regex(r"--[\s\S]*?(--|$)", flags=re.MULTILINE) comment = hyphen_comment | cStyleComment # identifier identifier = build_identifier('[a-z]') # references # these are duplicated to force unique token annotations valuereference = build_identifier('[a-z]') typereference = build_identifier('[A-Z]') module_reference = build_identifier('[A-Z]') reference = valuereference | typereference # TODO: consider object references from 12.1 # values # BUG: These are badly specified and cause the grammar to break if used generally. # todo: consider more literals from 16.9 real_value = Regex(r'-?\d+(\.\d*)?') # todo: this doesn't really follow the spec boolean_value = TRUE | FALSE bitstring_value = bstring | hstring # todo: consider more forms from 21.9 integer_value = signed_number null_value = NULL cstring_value = dblQuotedString builtin_value = boolean_value | bitstring_value | real_value | integer_value | null_value | cstring_value defined_value = valuereference # todo: more options from 13.1 # object identifier value name_form = Unique(identifier) number_form = Unique(number) name_and_number_form = name_form + Suppress('(') + number_form + Suppress(')') objid_components = name_and_number_form | name_form | number_form | defined_value objid_components_list = OneOrMore(objid_components) object_identifier_value = Suppress('{') + \ (objid_components_list | (defined_value + objid_components_list)) + \ Suppress('}') value = builtin_value | defined_value | object_identifier_value # definitive identifier value definitive_number_form = Unique(number) definitive_name_and_number_form = name_form + Suppress('(') + definitive_number_form + Suppress(')') definitive_objid_component = definitive_name_and_number_form | name_form | definitive_number_form definitive_objid_component_list = OneOrMore(definitive_objid_component) definitive_identifier = Optional(Suppress('{') + definitive_objid_component_list + Suppress('}')) # tags class_ = UNIVERSAL | APPLICATION | PRIVATE class_number = Unique(number) # todo: consider defined values from 30.1 tag = Suppress('[') + Optional(class_) + class_number + Suppress(']') tag_default = EXPLICIT_TAGS | IMPLICIT_TAGS | AUTOMATIC_TAGS | empty # extensions extension_default = EXTENSIBILITY_IMPLIED | empty # types defined_type = Unique(typereference) # todo: consider other defined types from 13.1 referenced_type = Unique(defined_type) # todo: consider other ref:d types from 16.3 # Forward-declare these, they can only be fully defined once # we have all types defined. There are some circular dependencies. named_type = Forward() type_ = Forward() # constraints # todo: consider the full subtype and general constraint syntax described in 45.* # but for now, just implement a simple integer value range. value_range_constraint = (signed_number | valuereference | MIN) + Suppress('..') + (signed_number | valuereference | MAX) size_constraint = Optional(Suppress('(')) + Suppress(SIZE) + Suppress('(') + (value_range_constraint | signed_number) + Suppress(')') + Optional(Suppress(')')) constraint = Suppress('(') + value_range_constraint + Suppress(')') # TODO: consider exception syntax from 24.1 extension_marker = Unique(ELLIPSIS) component_type_optional = named_type + Suppress(OPTIONAL) component_type_default = named_type + Suppress(DEFAULT) + value component_type_components_of = Suppress(COMPONENTS_OF) + type_ component_type = component_type_components_of | component_type_optional | component_type_default | named_type tagged_type = tag + Optional(IMPLICIT | EXPLICIT) + type_ named_number_value = Suppress('(') + signed_number + Suppress(')') named_number = identifier + named_number_value enumeration = named_number | identifier set_type = SET + braced_list(component_type | extension_marker) sequence_type = SEQUENCE + braced_list(component_type | extension_marker) sequenceof_type = Suppress(SEQUENCE) + Optional(size_constraint) + Suppress(OF) + (type_ | named_type) setof_type = Suppress(SET) + Optional(size_constraint) + Suppress(OF) + (type_ | named_type) choice_type = CHOICE + braced_list(named_type | extension_marker) enumerated_type = ENUMERATED + braced_list(enumeration | extension_marker) bitstring_type = BIT_STRING + Optional(braced_list(named_number)) plain_integer_type = INTEGER restricted_integer_type = INTEGER + braced_list(named_number) boolean_type = BOOLEAN real_type = REAL null_type = NULL object_identifier_type = OBJECT_IDENTIFIER octetstring_type = OCTET_STRING + Optional(size_constraint) unrestricted_characterstring_type = CHARACTER_STRING restricted_characterstring_type = BMPString | GeneralString | \ GraphicString | IA5String | \ ISO646String | NumericString | \ PrintableString | TeletexString | \ T61String | UniversalString | \ UTF8String | VideotexString | VisibleString characterstring_type = restricted_characterstring_type | unrestricted_characterstring_type useful_type = GeneralizedTime | UTCTime | ObjectDescriptor # todo: consider other builtins from 16.2 simple_type = (boolean_type | null_type | octetstring_type | characterstring_type | real_type | plain_integer_type | object_identifier_type | useful_type) + Optional(constraint) constructed_type = choice_type | sequence_type | set_type value_list_type = restricted_integer_type | enumerated_type builtin_type = value_list_type | tagged_type | simple_type | constructed_type | sequenceof_type | setof_type | bitstring_type type_ << (builtin_type | referenced_type) # EXT: identifier should not be Optional here, but # our other ASN.1 code generator supports unnamed members, # and we use them. named_type << (Optional(identifier) + type_) type_assignment = typereference + '::=' + type_ value_assignment = valuereference + type_ + '::=' + value assignment = type_assignment | value_assignment assignment_list = ZeroOrMore(assignment) assigned_identifier = Optional(object_identifier_value | defined_value) global_module_reference = module_reference + assigned_identifier symbol = Unique(reference) # TODO: parameterized reference? symbol_list = Group(delimitedList(symbol)) symbols_from_module = symbol_list + Suppress(FROM) + global_module_reference symbols_from_module_list = OneOrMore(symbols_from_module) symbols_imported = Optional(symbols_from_module_list) exports = Optional(Suppress(EXPORTS) + symbol_list + Suppress(';')) imports = Optional(Suppress(IMPORTS) + symbols_imported + Suppress(';')) module_body = (exports + imports + assignment_list) module_defaults = Suppress(tag_default + extension_default) # we don't want these in the AST module_identifier = module_reference + definitive_identifier module_definition = module_identifier + DEFINITIONS + module_defaults + '::=' + BEGIN + module_body + END module_definition.ignore(comment) # Mark up the parse results with token tags identifier.setParseAction(annotate('Identifier')) named_number_value.setParseAction(annotate('Value')) tag.setParseAction(annotate('Tag')) class_.setParseAction(annotate('TagClass')) class_number.setParseAction(annotate('TagClassNumber')) type_.setParseAction(annotate('Type')) simple_type.setParseAction(annotate('SimpleType')) choice_type.setParseAction(annotate('ChoiceType')) sequence_type.setParseAction(annotate('SequenceType')) set_type.setParseAction(annotate('SetType')) value_list_type.setParseAction(annotate('ValueListType')) bitstring_type.setParseAction(annotate('BitStringType')) referenced_type.setParseAction(annotate('ReferencedType')) sequenceof_type.setParseAction(annotate('SequenceOfType')) setof_type.setParseAction(annotate('SetOfType')) named_number.setParseAction(annotate('NamedValue')) constraint.setParseAction(annotate('Constraint')) size_constraint.setParseAction(annotate('SizeConstraint')) component_type.setParseAction(annotate('ComponentType')) component_type_optional.setParseAction(annotate('ComponentTypeOptional')) component_type_default.setParseAction(annotate('ComponentTypeDefault')) component_type_components_of.setParseAction(annotate('ComponentTypeComponentsOf')) tagged_type.setParseAction(annotate('TaggedType')) named_type.setParseAction(annotate('NamedType')) type_assignment.setParseAction(annotate('TypeAssignment')) value_assignment.setParseAction(annotate('ValueAssignment')) valuereference.setParseAction(annotate('ValueReference')) module_reference.setParseAction(annotate('ModuleReference')) module_body.setParseAction(annotate('ModuleBody')) module_definition.setParseAction(annotate('ModuleDefinition')) extension_marker.setParseAction(annotate('ExtensionMarker')) name_form.setParseAction(annotate('NameForm')) number_form.setParseAction(annotate('NumberForm')) name_and_number_form.setParseAction(annotate('NameAndNumberForm')) object_identifier_value.setParseAction(annotate('ObjectIdentifierValue')) definitive_identifier.setParseAction(annotate('DefinitiveIdentifier')) definitive_number_form.setParseAction(annotate('DefinitiveNumberForm')) definitive_name_and_number_form.setParseAction(annotate('DefinitiveNameAndNumberForm')) imports.setParseAction(annotate('Imports')) exports.setParseAction(annotate('Exports')) assignment_list.setParseAction(annotate('AssignmentList')) bstring.setParseAction(annotate('BinaryStringValue')) hstring.setParseAction(annotate('HexStringValue')) start = OneOrMore(module_definition) return start
return [toks] self.setParseAction(listify) ParserElement.setDefaultWhitespaceChars("\n\t") backslash = chr(92) texcmd = Forward() filler = CharsNotIn(backslash + '$') filler2 = CharsNotIn(backslash + '$' + '{}') arg = '[' + CharsNotIn("]") + ']' arg.setParseAction(argfun) dollarmath = QuotedString('$', multiline=True, unquoteResults=False) param = Suppress(Literal('{')) + ZeroOrMoreAsList(dollarmath | filler2 | QuotedString('{', endQuoteChar='}', unquoteResults=False) | texcmd) + Suppress(Literal('}')) param.setParseAction(paramfun) def bs(c): return Literal("\\" + c) singles = bs("[") | bs("]") | bs("{") | bs("}") | bs("\\") | bs("&") | bs("_") | bs(",") | bs("#") | bs("\n") | bs(";") | bs("|") | bs("%") | bs("*") | bs("~") | bs("^") texcmd << (singles | Word("\\", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", min = 2)) + ZeroOrMoreAsList(arg) + ZeroOrMoreAsList(param) def texcmdfun(s, loc, toks): return TexCmd(s, loc, toks) texcmd.setParseAction(texcmdfun) #legal = "".join([chr(x) for x in set(range(32, 127)) - set(backslash)]) #filler = Word(legal) document = ZeroOrMore(dollarmath | texcmd | filler) + StringEnd().suppress() if 0: s = "This is \\\\ test" print s for t in document.parseString(s):
DECIMAL = Regex(r'[0-9]*\.[0-9]+') # (?![eE]) # DECIMAL.setResultsName('decimal') DECIMAL.setParseAction( lambda x: rdflib.Literal(x[0], datatype=rdflib.XSD.decimal)) # [148] DOUBLE ::= [0-9]+ '.' [0-9]* EXPONENT | '.' ([0-9])+ EXPONENT | ([0-9])+ EXPONENT DOUBLE = Regex( r'[0-9]+\.[0-9]*%(e)s|\.([0-9])+%(e)s|[0-9]+%(e)s' % {'e': EXPONENT_re}) # DOUBLE.setResultsName('double') DOUBLE.setParseAction( lambda x: rdflib.Literal(x[0], datatype=rdflib.XSD.double)) # [149] INTEGER_POSITIVE ::= '+' INTEGER INTEGER_POSITIVE = Suppress('+') + INTEGER.copy().leaveWhitespace() INTEGER_POSITIVE.setParseAction(lambda x: rdflib.Literal( "+"+x[0], datatype=rdflib.XSD.integer)) # [150] DECIMAL_POSITIVE ::= '+' DECIMAL DECIMAL_POSITIVE = Suppress('+') + DECIMAL.copy().leaveWhitespace() # [151] DOUBLE_POSITIVE ::= '+' DOUBLE DOUBLE_POSITIVE = Suppress('+') + DOUBLE.copy().leaveWhitespace() # [152] INTEGER_NEGATIVE ::= '-' INTEGER INTEGER_NEGATIVE = Suppress('-') + INTEGER.copy().leaveWhitespace() INTEGER_NEGATIVE.setParseAction(lambda x: neg(x[0])) # [153] DECIMAL_NEGATIVE ::= '-' DECIMAL DECIMAL_NEGATIVE = Suppress('-') + DECIMAL.copy().leaveWhitespace() DECIMAL_NEGATIVE.setParseAction(lambda x: neg(x[0]))
data = single | tuple_ # should not match a single (tr) simple_data = Group(NotAny('(tr)') + data + ZeroOrMore(Optional(Suppress(',')) + data)) # the first element of a set data record cannot be 'dimen', or else # these would match set_def_stmts non_dimen_simple_data = ~Literal('dimen') + simple_data matrix_row = Group(single + OneOrMore(PLUS | MINUS)) matrix_data = ":" + OneOrMore(single).setResultsName('columns') \ + ":=" + OneOrMore(matrix_row).setResultsName('data') matrix_data.setParseAction(MatrixData) tr_matrix_data = Suppress("(tr)") + matrix_data tr_matrix_data.setParseAction(mark_transposed) set_slice_component = number | symbol | '*' set_slice_record = LPAREN + NotAny('tr') + delimitedList(set_slice_component) + RPAREN set_slice_record.setParseAction(SliceRecord) _set_record = set_slice_record | matrix_data | tr_matrix_data | Suppress(":=") set_record = simple_data | _set_record non_dimen_set_record = non_dimen_simple_data | _set_record set_def_stmt = "set" + symbol + Optional(subscript_domain) + \ Optional("dimen" + integer.setResultsName('dimen')) + END set_def_stmt.setParseAction(SetDefStmt) set_member = LBRACKET + delimitedList(data) + RBRACKET
variable_operand.setParseAction(EvalVariable) explicit_variable_operand.setParseAction(EvalExplicitVariable) integer_operand.setParseAction(EvalInteger) real_operand.setParseAction(EvalReal) string_operand.setParseAction(EvalString) constant.setParseAction(EvalConstant) regexp.setParseAction(EvalRegExp) timespan.setParseAction(EvalTimespan) modifier = Regex(r"([a-zA-Z][a-zA-Z0-9_]*)\:") simple_list_operand = Group(delimitedList(expr)) simple_list_operand.setParseAction(EvalSimpleList) list_operand = Suppress("[") + delimitedList(expr) + Suppress("]") list_operand.setParseAction(EvalList) empty_list_operand = Literal("[]") empty_list_operand.setParseAction(EvalEmptyList) dict_item = Group(expr + Suppress(Literal(":")) + expr) dict_operand = Group(Suppress("{") + delimitedList(dict_item) + Suppress("}")) dict_operand.setParseAction(EvalDict) empty_dict_operand = Literal("{}") empty_dict_operand.setParseAction(EvalEmptyDict) key_pair = Group(Regex(r"([a-zA-Z0-9_]+)") + Suppress(Literal("=") + WordEnd("=!+-*/")) + expr) key_pair_dict_operand = delimitedList(key_pair) key_pair_dict_operand.setParseAction(EvalKeyPairDict)
# DOUBLE.setResultsName('double') DOUBLE.setParseAction(lambda x: rdflib.Literal(x[0], datatype=rdflib.XSD.double)) # [149] INTEGER_POSITIVE ::= '+' INTEGER INTEGER_POSITIVE = Suppress("+") + INTEGER.copy().leaveWhitespace() # [150] DECIMAL_POSITIVE ::= '+' DECIMAL DECIMAL_POSITIVE = Suppress("+") + DECIMAL.copy().leaveWhitespace() # [151] DOUBLE_POSITIVE ::= '+' DOUBLE DOUBLE_POSITIVE = Suppress("+") + DOUBLE.copy().leaveWhitespace() # [152] INTEGER_NEGATIVE ::= '-' INTEGER INTEGER_NEGATIVE = Suppress("-") + INTEGER.copy().leaveWhitespace() INTEGER_NEGATIVE.setParseAction(lambda x: neg(x[0])) # [153] DECIMAL_NEGATIVE ::= '-' DECIMAL DECIMAL_NEGATIVE = Suppress("-") + DECIMAL.copy().leaveWhitespace() DECIMAL_NEGATIVE.setParseAction(lambda x: neg(x[0])) # [154] DOUBLE_NEGATIVE ::= '-' DOUBLE DOUBLE_NEGATIVE = Suppress("-") + DOUBLE.copy().leaveWhitespace() DOUBLE_NEGATIVE.setParseAction(lambda x: neg(x[0])) # [160] ECHAR ::= '\' [tbnrf\"'] # ECHAR = Regex('\\\\[tbnrf"\']') # [158] STRING_LITERAL_LONG1 ::= "'''" ( ( "'" | "''" )? ( [^'\] | ECHAR ) )* "'''" # STRING_LITERAL_LONG1 = Literal("'''") + ( Optional( Literal("'") | "''" ) + ZeroOrMore( ~ Literal("'\\") | ECHAR ) ) + "'''"
# For for_header = ( for_kw + identifier + Suppress("=") + expression + to_kw + expression + pOptional(step_kw + expression)).setParseAction(lambda r: ForHeader(*r)) for_footer = (next_kw + pOptional(identifier)).setParseAction(lambda r: ForFooter(*r)) loop_statement = for_header | for_footer # If if_header = (if_kw + expression + pOptional(then_kw)).setParseAction(lambda r: IfHeader(*r)) elseif_header = (elseif_kw + expression + pOptional(then_kw)).setParseAction(lambda r: ElseIfHeader(*r)) else_header = else_kw.setParseAction(lambda r: ElseHeader()) if_footer = (end_kw + if_kw).setParseAction(lambda r: IfFooter()) if_oneliner = (if_kw + expression + then_kw + statement + pOptional(else_kw + statement)).setParseAction(lambda r: If( condition=r[0], body=[r[1]], else_block=Block([r[3]]) if len(r) >= 3 else None, )) conditional_statement = (if_oneliner | if_header | elseif_header | else_header | if_footer) #################### # Error handling # ####################
| Literal("resize") + Suppress("(") + _basic_expr + "," + _basic_expr + Suppress(")")) _word_function.setParseAction(lambda s, l, t: WordFunction(t[0], t[1], t[2])) _count = (Literal("count") + Suppress("(") + delimitedList(_basic_expr) + Suppress(")")) _count.setParseAction(lambda s, l, t: Count(t[1])) _next = Literal("next") + Suppress("(") + _basic_expr + Suppress(")") _next.setParseAction(lambda s, l, t: Next(t[1])) _case_case = _basic_expr + Suppress(":") + _basic_expr + Suppress(";") _case_body = OneOrMore(_case_case) _case_body.setParseAction(lambda s, l, t: OrderedDict(zip(t[::2], t[1::2]))) _case = Suppress("case") + _case_body + Suppress("esac") _case.setParseAction(lambda s, l, t: Case(t[0])) _base = (complex_identifier ^ (_conversion | _word_function | _count | _next | Suppress("(") + _basic_expr + Suppress(")") | _case | constant)) _ap = Forward() _array_subscript = Group(Suppress("[") + _basic_expr + Suppress("]")) _word_bit_selection = Group(Suppress("[") + _basic_expr + Suppress(":") + _basic_expr + Suppress("]"))
class Parser(object): """Lexical and Syntax analysis""" @property def semantic_analyser(self): return self._AST.semantic_analyser def __init__(self): self._AST = Syntax_tree() # keywords self.int_ = Keyword('Int') self.false_ = Keyword('False') self.true_ = Keyword('True') self.bit_ = Combine(Optional(Literal("@")) + Keyword('Bit')) self.sbox_ = Keyword('Sbox') self.l_shift_ = Keyword('<<') self.r_shift_ = Keyword('>>') self.circ_l_shift_ = Keyword('<<<') self.circ_r_shift_ = Keyword('>>>') self.bit_val = self.false_ ^ self.true_ self.if_ = Keyword('if') self.for_ = Keyword('for') self.return_ = Keyword('return') self.void_ = Keyword('void') self.ID = NotAny(self.sbox_ ^ self.int_ ^ self.bit_ ^ self.false_ ^ self.true_ ^ self.if_ ^ self.for_ ^ self.sbox_) + Word(alphas + '_', alphanums + '_') # NOQA self.ID_ = NotAny(self.sbox_ ^ self.int_ ^ self.bit_ ^ self.false_ ^ self.true_ ^ self.if_ ^ self.for_ ^ self.sbox_) + Word(alphas + '_', alphanums + '_') # Other Tokens self.l_bracket = Literal('(') self.r_bracket = Literal(')') self.eq_set = Literal('=')("set") self.term_st = Literal(';') self.b_2_num = Combine(Literal("0b") + Word("01")) self.b_2_num.setParseAction(self.semantic_analyser.convert_base_to_str) self.b_16_num = Combine(Literal("0x") + Word(srange("[0-9a-fA-F]"))) self.b_16_num.setParseAction(self.semantic_analyser.convert_base_to_str) self.b_10_num = Word(nums) self.bit_and = Literal('&') self.bit_or = Keyword('|') self.bit_xor = Keyword('^') self.bit_not = Literal('~') self.eq_compare = Literal('==') self.neq_compare = Literal('!=') self.l_brace = Literal('{') self.r_brace = Literal('}') self.bin_add = Literal('+') self.bin_mult = Literal('*') self.bin_sub = Literal('-') self.bin_mod = Literal('%') self.bin_div = Literal('/') self.g_than = Literal('>') self.ge_than = Literal('>=') self.l_than = Literal('<') self.le_than = Literal('<=') self.log_and = Keyword('&&') self.log_or = Keyword('||') self.l_sq_b = Literal('[') self.r_sq_b = Literal(']') # Operator Productions self.log_op = self.log_and ^ self.log_or self.comparison_op = self.g_than ^ self.ge_than ^ self.l_than ^ self.le_than ^ self.eq_compare ^ self.neq_compare self.arith_op = self.bin_add ^ self.bin_mult ^ self.bin_sub ^ self.bin_mod ^ self.bin_div self.bitwise_op = self.bit_and ^ self.bit_or ^ self.bit_xor ^ self.bit_not ^ self.l_shift_ ^ self.r_shift_ ^ self.circ_l_shift_ ^ self.circ_r_shift_ # Grammar self.stmt = Forward() self.for_loop = Forward() self.cast = Forward() self.seq_val = Forward() self.int_value = self.b_2_num ^ self.b_16_num ^ self.b_10_num self.expr = Forward() self.function_call = Forward() self.index_select = Forward() self.seq_ = Forward() self.operand = Forward() self.seq_range = Forward() # #######Operands self.sbox_call = Group((self.ID ^ self.seq_val) + ~White() + Literal(".") + ~White() + self.sbox_ + ~White() + self.l_bracket + (self.ID ^ self.int_value) + self.r_bracket) self.operand = self.index_select | self.seq_val | self.function_call | self.ID | self.int_value | self.cast | self.bit_val self.seq_val.setParseAction(lambda t: ['Seq_val'] + [t.asList()]) self.index_select.setParseAction(lambda t: ['index_select'] + [t.asList()]) self.function_call.setParseAction(lambda t: ['function_call'] + [t.asList()]) self.ID.setParseAction(lambda t: ['ID'] + [t.asList()]) self.int_value.setParseAction(lambda t: ['Int_val'] + [t.asList()]) self.cast.setParseAction(lambda t: ['cast'] + [t.asList()]) self.bit_val.setParseAction(lambda t: ['Bit_val'] + [t.asList()]) self.seq_range.setParseAction(lambda t: ['seq_range'] + [t.asList()]) # #######Expressions self.expr = Group(infixNotation(Group(self.operand), [(self.bitwise_op, 2, opAssoc.LEFT, self.nest_operand_pairs), (self.comparison_op, 2, opAssoc.LEFT, self.nest_operand_pairs), (self.log_op, 2, opAssoc.LEFT, self.nest_operand_pairs), (self.arith_op, 2, opAssoc.LEFT, self.nest_operand_pairs)])) # self.expr.setParseAction(self.expr_p) self.int_size = Combine(Optional(Literal("@")) + self.int_)("decl") + ~White() + Suppress(self.l_bracket) + self.expr + Suppress(self.r_bracket) self.sbox_size = self.sbox_ + ~White() + Suppress(self.l_bracket) + self.expr + Suppress(self.r_bracket) self.seq_range << self.expr + Suppress(Literal(":")) + self.expr self.seq_val << Suppress(self.l_sq_b) + Optional(Group(delimitedList(self.expr))) + Suppress(self.r_sq_b) self.seq_ << (self.int_size | self.bit_ | self.sbox_size)("type") +\ Group(OneOrMore(~White() + Suppress(self.l_sq_b) + self.expr + Suppress(self.r_sq_b)))("seq_size") self.function_call << self.ID("function_name") + ~White() + Suppress(self.l_bracket) +\ Optional(Group(delimitedList(self.expr)))("param_list") + Suppress(self.r_bracket) self.cast << Suppress(self.l_bracket) + Group((self.seq_ | self.int_size | self.bit_)) +\ Suppress(self.r_bracket) + (self.expr)("target") self.index_select << (self.ID("ID") ^ (Suppress(self.l_bracket) + self.cast + Suppress(self.r_bracket))("cast")) + ~White() +\ Group(OneOrMore(Suppress(self.l_sq_b) + Group(delimitedList(self.expr ^ Group(Group(self.seq_range))))("index") + Suppress(self.r_sq_b))) # ####### Declarations self.id_set = Group((Group(self.index_select) | self.ID_) + self.eq_set + self.expr) self.id_set.setParseAction(self.AST.id_set) self.int_decl = Group(self.int_size + delimitedList(Group((self.ID_("ID") + self.eq_set + self.expr("set_value")) | self.ID_("ID")))("value")) # NOQA self.int_decl.setParseAction(self.AST.int_decl) self.bit_decl = Group(self.bit_("decl") + delimitedList(Group(self.ID_("ID")) ^ Group(self.ID_("ID") + self.eq_set + self.expr("set_value")))("value")) self.bit_decl.setParseAction(self.AST.bit_decl) self.seq_decl = Group(self.seq_("decl") + Group(self.ID)("ID") + Optional(self.eq_set + Group(self.expr))("value")) self.seq_decl.setParseAction(self.AST.seq_decl) self.decl = self.bit_decl ^ self.int_decl ^ self.seq_decl # ###### Statements self.return_stmt = Group(self.return_ + self.expr) self.return_stmt.setParseAction(self.AST.return_stmt) self.function_start = Literal("{") self.function_start.setParseAction(self.AST.function_start) self.function_end = Literal("}") self.function_decl = Group((Group(self.seq_) | Group(self.int_size) | Group(self.bit_) | Group(self.void_))("return_type") + Group(self.ID)("func_ID") + Suppress(self.l_bracket) + Group(Optional(delimitedList(Group((self.seq_ | self.int_size | self.bit_) + Group(self.ID)))))("func_param") + # NOQA Suppress(self.r_bracket) + Suppress(self.function_start) + Group(self.stmt)("body") + Suppress(self.r_brace)) self.function_decl.setParseAction(self.AST.function_decl) self.for_init = Literal('(') self.for_init.setParseAction(self.AST.begin_for) self.for_terminator = Literal(';') self.for_terminator.setParseAction(self.AST.for_terminator) self.for_increment = Literal(';') self.for_increment.setParseAction(self.AST.for_increment) self.terminator_expr = Group(infixNotation(Group(self.operand), [(self.log_op, 2, opAssoc.LEFT, self.nest_operand_pairs), (self.bitwise_op, 2, opAssoc.LEFT, self.nest_operand_pairs), (self.comparison_op, 2, opAssoc.LEFT, self.nest_operand_pairs), (self.arith_op, 2, opAssoc.LEFT, self.nest_operand_pairs)])) self.terminator_expr.setParseAction(self.AST.terminator_expr) self.for_body = Literal('{') self.for_body.setParseAction(self.AST.for_body) self.end_for = Literal('}') self.end_for.setParseAction(self.AST.end_for) self.for_loop << Group(self.for_ + ~White() + Suppress(self.for_init) + Optional(delimitedList(self.decl ^ self.id_set))("init") + Suppress(self.for_terminator) + Optional(self.terminator_expr) + Suppress(self.for_increment) + Optional(delimitedList(self.id_set))("increm") + Suppress(self.r_bracket) + Suppress(self.for_body) + self.stmt("loop_body") + Suppress(self.end_for)) self.if_condition = Suppress(self.l_bracket) + self.expr + Suppress(self.r_bracket) self.if_condition.setParseAction(self.AST.if_cond) self.if_.setParseAction(self.AST.begin_if) self.if_body_st = Literal('{') self.if_body_st.setParseAction(self.AST.if_body_st) self.if_body_end = Literal('}') self.if_body_end.setParseAction(self.AST.if_body_end) self.if_stmt = Group(self.if_ + self.if_condition("if_cond") + Suppress(self.if_body_st) + Group(self.stmt).setResultsName("body") + Suppress(self.if_body_end)) self.single_expr = self.expr + Suppress(self.term_st) self.single_expr.setParseAction(self.AST.stand_alone_expr) self.stmt << ZeroOrMore(self.decl + Suppress(self.term_st) ^ self.function_decl ^ self.id_set + Suppress(self.term_st) ^ self.single_expr ^ self.for_loop ^ self.if_stmt ^ self.return_stmt + Suppress(self.term_st) ^ self.sbox_call + Suppress(self.term_st)) self.grammar_test = self.stmt + StringEnd() # Allows single statements to be parsed self.grammar = ZeroOrMore(self.function_decl ^ self.seq_decl + Suppress(self.term_st)) + StringEnd() def nest_operand_pairs(self, tokens): tokens = tokens[0] ret = ParseResults(tokens[:3]) remaining = iter(tokens[3:]) done = False while not done: next_pair = (next(remaining, None), next(remaining, None)) if next_pair == (None, None): done = True break ret = ParseResults([ret]) ret += ParseResults(list(next_pair)) return [ret] @property def AST(self): return self._AST @AST.setter def AST(self, value): self._AST = value def analyse_tree_test(self, AST): return self.semantic_analyser.analyse(AST) def parse_test_unit(self, data_in): """Parses single statements""" try: res = self.grammar_test.parseString(data_in) except ParseException as details: print("The following error occured:") print(details) return False if type(res[0]) is not bool: pass # print(res[0].dump()) return [res, True] def parse_test_AST_semantic(self, data_in): """Parses single statements and returns AST""" try: self.grammar_test.parseString(data_in) except ParseException as details: print("The following error occured:") print(details) return False return self.AST def parse_test_integration(self, data_in): """Only Parses Statements in functions""" try: res = self.grammar.parseString(data_in) except ParseException as details: print("The following error occured:") print(details) return False # if type(res[0]) is not bool: # print(res[0].dump()) return [res, True] def parse(self, data_in): """Prod parsing entry point""" self.grammar.parseString(data_in) if self.semantic_analyser.analyse(self.AST, True) is True: return self.semantic_analyser.IR.translate()
def _build_asn1_grammar(): def build_identifier(prefix_pattern): identifier_suffix = Optional(Word(srange('[-0-9a-zA-Z]'))) # todo: more rigorous? trailing hyphens and -- forbidden return Combine( Word(srange(prefix_pattern), exact=1) + identifier_suffix) def braced_list(element_rule): elements_rule = Optional(delimitedList(element_rule)) return Suppress('{') + Group(elements_rule) + Suppress('}') def annotate(name): def annotation(t): return AnnotatedToken(name, t.asList()) return annotation # Reserved words ANY = Keyword('ANY') DEFINED_BY = Keyword('DEFINED BY') DEFINITIONS = Keyword('DEFINITIONS') BEGIN = Keyword('BEGIN') END = Keyword('END') OPTIONAL = Keyword('OPTIONAL') DEFAULT = Keyword('DEFAULT') TRUE = Keyword('TRUE') FALSE = Keyword('FALSE') UNIVERSAL = Keyword('UNIVERSAL') APPLICATION = Keyword('APPLICATION') PRIVATE = Keyword('PRIVATE') MIN = Keyword('MIN') MAX = Keyword('MAX') IMPLICIT = Keyword('IMPLICIT') EXPLICIT = Keyword('EXPLICIT') EXPLICIT_TAGS = Keyword('EXPLICIT TAGS') IMPLICIT_TAGS = Keyword('IMPLICIT TAGS') AUTOMATIC_TAGS = Keyword('AUTOMATIC TAGS') EXTENSIBILITY_IMPLIED = Keyword('EXTENSIBILITY IMPLIED') COMPONENTS_OF = Keyword('COMPONENTS OF') ELLIPSIS = Keyword('...') SIZE = Keyword('SIZE') OF = Keyword('OF') IMPORTS = Keyword('IMPORTS') EXPORTS = Keyword('EXPORTS') FROM = Keyword('FROM') # Built-in types SEQUENCE = Keyword('SEQUENCE') SET = Keyword('SET') CHOICE = Keyword('CHOICE') ENUMERATED = Keyword('ENUMERATED') BIT_STRING = Keyword('BIT STRING') BOOLEAN = Keyword('BOOLEAN') REAL = Keyword('REAL') OCTET_STRING = Keyword('OCTET STRING') CHARACTER_STRING = Keyword('CHARACTER STRING') NULL = Keyword('NULL') INTEGER = Keyword('INTEGER') OBJECT_IDENTIFIER = Keyword('OBJECT IDENTIFIER') # Restricted string types BMPString = Keyword('BMPString') GeneralString = Keyword('GeneralString') GraphicString = Keyword('GraphicString') IA5String = Keyword('IA5String') ISO646String = Keyword('ISO646String') NumericString = Keyword('NumericString') PrintableString = Keyword('PrintableString') TeletexString = Keyword('TeletexString') T61String = Keyword('T61String') UniversalString = Keyword('UniversalString') UTF8String = Keyword('UTF8String') VideotexString = Keyword('VideotexString') VisibleString = Keyword('VisibleString') # Useful types GeneralizedTime = Keyword('GeneralizedTime') UTCTime = Keyword('UTCTime') ObjectDescriptor = Keyword('ObjectDescriptor') # Literals number = Word(nums) signed_number = Combine(Optional('-') + number) # todo: consider defined values from 18.1 bstring = Suppress('\'') + StringOf('01') + Suppress('\'B') hstring = Suppress('\'') + StringOf('0123456789ABCDEF') + Suppress('\'H') # Comments hyphen_comment = Regex(r"--[\s\S]*?(--|$)", flags=re.MULTILINE) comment = hyphen_comment | cStyleComment # identifier identifier = build_identifier('[a-z]') # references # these are duplicated to force unique token annotations valuereference = build_identifier('[a-z]') typereference = build_identifier('[A-Z]') module_reference = build_identifier('[A-Z]') reference = valuereference | typereference # TODO: consider object references from 12.1 # values # todo: consider more literals from 16.9 boolean_value = TRUE | FALSE bitstring_value = bstring | hstring # todo: consider more forms from 21.9 integer_value = signed_number null_value = NULL cstring_value = dblQuotedString exponent = CaselessLiteral('e') + signed_number real_value = Combine(signed_number + Optional(Literal('.') + Optional(number)) + Optional(exponent)) # In value range constraints, decimal points must be followed by number, or # the grammar becomes ambiguous: ([1.].100) vs ([1]..[100]) constraint_real_value = Combine(signed_number + Optional(Literal('.') + number) + Optional(exponent)) builtin_value = boolean_value | bitstring_value | real_value | integer_value | null_value | cstring_value external_value_reference = module_reference + Suppress( '.') + valuereference defined_value = external_value_reference | valuereference # todo: more options from 13.1 referenced_value = Unique(defined_value) # todo: more options from 16.11 # object identifier value name_form = Unique(identifier) number_form = Unique(number) name_and_number_form = name_form + Suppress('(') + number_form + Suppress( ')') objid_components = name_and_number_form | name_form | number_form | defined_value objid_components_list = OneOrMore(objid_components) object_identifier_value = Suppress('{') + \ (objid_components_list | (defined_value + objid_components_list)) + \ Suppress('}') value = builtin_value | referenced_value | object_identifier_value # definitive identifier value definitive_number_form = Unique(number) definitive_name_and_number_form = name_form + Suppress( '(') + definitive_number_form + Suppress(')') definitive_objid_component = definitive_name_and_number_form | name_form | definitive_number_form definitive_objid_component_list = OneOrMore(definitive_objid_component) definitive_identifier = Optional( Suppress('{') + definitive_objid_component_list + Suppress('}')) # tags class_ = UNIVERSAL | APPLICATION | PRIVATE class_number = Unique(number) # todo: consider defined values from 30.1 tag = Suppress('[') + Optional(class_) + class_number + Suppress(']') tag_default = EXPLICIT_TAGS | IMPLICIT_TAGS | AUTOMATIC_TAGS # extensions extension_default = Unique(EXTENSIBILITY_IMPLIED) # values # Forward-declare these, they can only be fully defined once # we have all types defined. There are some circular dependencies. named_type = Forward() type_ = Forward() # constraints # todo: consider the full subtype and general constraint syntax described in 45.* lower_bound = (constraint_real_value | signed_number | referenced_value | MIN) upper_bound = (constraint_real_value | signed_number | referenced_value | MAX) single_value_constraint = Suppress('(') + value + Suppress(')') value_range_constraint = Suppress('(') + lower_bound + Suppress( '..') + upper_bound + Suppress(')') # TODO: Include contained subtype constraint here if we ever implement it. size_constraint = Optional(Suppress('(')) + Suppress(SIZE) + ( single_value_constraint | value_range_constraint) + Optional( Suppress(')')) # types # todo: consider other defined types from 13.1 defined_type = Optional(module_reference + Suppress('.'), default=None) + typereference + Optional( size_constraint, default=None) # TODO: consider exception syntax from 24.1 extension_marker = Unique(ELLIPSIS) component_type_optional = named_type + Suppress(OPTIONAL) component_type_default = named_type + Suppress(DEFAULT) + value component_type_components_of = Suppress(COMPONENTS_OF) + type_ component_type = component_type_components_of | component_type_optional | component_type_default | named_type tagged_type = tag + Optional(IMPLICIT | EXPLICIT, default=None) + type_ named_number_value = Suppress('(') + signed_number + Suppress(')') named_number = identifier + named_number_value named_nonumber = Unique(identifier) enumeration = named_number | named_nonumber set_type = SET + braced_list(component_type | extension_marker) sequence_type = SEQUENCE + braced_list(component_type | extension_marker) sequenceof_type = Suppress(SEQUENCE) + Optional( size_constraint, default=None) + Suppress(OF) + (type_ | named_type) setof_type = Suppress(SET) + Optional( size_constraint, default=None) + Suppress(OF) + (type_ | named_type) choice_type = CHOICE + braced_list(named_type | extension_marker) selection_type = identifier + Suppress('<') + type_ enumerated_type = ENUMERATED + braced_list(enumeration | extension_marker) bitstring_type = BIT_STRING + Optional( braced_list(named_number), default=[]) + Optional( single_value_constraint | size_constraint, default=None) plain_integer_type = INTEGER restricted_integer_type = INTEGER + braced_list(named_number) + Optional( single_value_constraint, default=None) boolean_type = BOOLEAN real_type = REAL null_type = NULL object_identifier_type = OBJECT_IDENTIFIER octetstring_type = OCTET_STRING + Optional(size_constraint) unrestricted_characterstring_type = CHARACTER_STRING restricted_characterstring_type = BMPString | GeneralString | \ GraphicString | IA5String | \ ISO646String | NumericString | \ PrintableString | TeletexString | \ T61String | UniversalString | \ UTF8String | VideotexString | \ VisibleString characterstring_type = ( restricted_characterstring_type | unrestricted_characterstring_type) + Optional(size_constraint) useful_type = GeneralizedTime | UTCTime | ObjectDescriptor # ANY type any_type = ANY + Optional(Suppress(DEFINED_BY + identifier)) # todo: consider other builtins from 16.2 simple_type = (any_type | boolean_type | null_type | octetstring_type | characterstring_type | real_type | plain_integer_type | object_identifier_type | useful_type) + Optional(value_range_constraint | single_value_constraint) constructed_type = choice_type | sequence_type | set_type value_list_type = restricted_integer_type | enumerated_type builtin_type = value_list_type | tagged_type | simple_type | constructed_type | sequenceof_type | setof_type | bitstring_type referenced_type = defined_type | selection_type # todo: consider other ref:d types from 16.3 type_ << (builtin_type | referenced_type) named_type << (identifier + type_) type_assignment = typereference + '::=' + type_ value_assignment = valuereference + type_ + '::=' + value assignment = type_assignment | value_assignment assignment_list = ZeroOrMore(assignment) # TODO: Maybe handle full assigned-identifier syntax with defined values # described in 12.1, but I haven't been able to find examples of it, and I # can't say for sure what acceptable syntax is. global_module_reference = module_reference + Optional( object_identifier_value, default=None) symbol = Unique(reference) # TODO: parameterized reference? symbol_list = delimitedList(symbol, delim=',') symbols_from_module = Group( Group(symbol_list) + Suppress(FROM) + global_module_reference) symbols_from_module_list = OneOrMore(symbols_from_module) symbols_imported = Unique(symbols_from_module_list) exports = Suppress(EXPORTS) + Optional(symbol_list) + Suppress(';') imports = Suppress(IMPORTS) + Optional(symbols_imported) + Suppress(';') module_body = Optional(exports, default=None) + Optional( imports, default=None) + assignment_list module_identifier = module_reference + definitive_identifier module_definition = module_identifier + Suppress(DEFINITIONS) + Optional(tag_default, default=None) + \ Optional(extension_default, default=None) + Suppress('::=') + \ Suppress(BEGIN) + module_body + Suppress(END) module_definition.ignore(comment) # Mark up the parse results with token tags identifier.setParseAction(annotate('Identifier')) named_number_value.setParseAction(annotate('Value')) tag.setParseAction(annotate('Tag')) class_.setParseAction(annotate('TagClass')) class_number.setParseAction(annotate('TagClassNumber')) type_.setParseAction(annotate('Type')) simple_type.setParseAction(annotate('SimpleType')) choice_type.setParseAction(annotate('ChoiceType')) sequence_type.setParseAction(annotate('SequenceType')) set_type.setParseAction(annotate('SetType')) value_list_type.setParseAction(annotate('ValueListType')) bitstring_type.setParseAction(annotate('BitStringType')) sequenceof_type.setParseAction(annotate('SequenceOfType')) setof_type.setParseAction(annotate('SetOfType')) named_number.setParseAction(annotate('NamedValue')) named_nonumber.setParseAction(annotate('NamedValue')) single_value_constraint.setParseAction(annotate('SingleValueConstraint')) size_constraint.setParseAction(annotate('SizeConstraint')) value_range_constraint.setParseAction(annotate('ValueRangeConstraint')) component_type.setParseAction(annotate('ComponentType')) component_type_optional.setParseAction(annotate('ComponentTypeOptional')) component_type_default.setParseAction(annotate('ComponentTypeDefault')) component_type_components_of.setParseAction( annotate('ComponentTypeComponentsOf')) tagged_type.setParseAction(annotate('TaggedType')) named_type.setParseAction(annotate('NamedType')) type_assignment.setParseAction(annotate('TypeAssignment')) value_assignment.setParseAction(annotate('ValueAssignment')) module_reference.setParseAction(annotate('ModuleReference')) global_module_reference.setParseAction(annotate('GlobalModuleReference')) module_body.setParseAction(annotate('ModuleBody')) module_definition.setParseAction(annotate('ModuleDefinition')) extension_marker.setParseAction(annotate('ExtensionMarker')) name_form.setParseAction(annotate('NameForm')) number_form.setParseAction(annotate('NumberForm')) name_and_number_form.setParseAction(annotate('NameAndNumberForm')) object_identifier_value.setParseAction(annotate('ObjectIdentifierValue')) definitive_identifier.setParseAction(annotate('DefinitiveIdentifier')) definitive_number_form.setParseAction(annotate('DefinitiveNumberForm')) definitive_name_and_number_form.setParseAction( annotate('DefinitiveNameAndNumberForm')) exports.setParseAction(annotate('Exports')) imports.setParseAction(annotate('Imports')) assignment_list.setParseAction(annotate('AssignmentList')) bstring.setParseAction(annotate('BinaryStringValue')) hstring.setParseAction(annotate('HexStringValue')) defined_type.setParseAction(annotate('DefinedType')) selection_type.setParseAction(annotate('SelectionType')) referenced_value.setParseAction(annotate('ReferencedValue')) start = OneOrMore(module_definition) return start
data = single | tuple_ # should not match a single (tr) simple_data = Group( NotAny('(tr)') + data + ZeroOrMore(Optional(Suppress(',')) + data)) # the first element of a set data record cannot be 'dimen', or else # these would match set_def_stmts non_dimen_simple_data = ~Literal('dimen') + simple_data matrix_row = Group(single + OneOrMore(PLUS | MINUS)) matrix_data = ":" + OneOrMore(single).setResultsName('columns') \ + ":=" + OneOrMore(matrix_row).setResultsName('data') matrix_data.setParseAction(MatrixData) tr_matrix_data = Suppress("(tr)") + matrix_data tr_matrix_data.setParseAction(mark_transposed) set_slice_component = number | symbol | '*' set_slice_record = LPAREN + NotAny('tr') + delimitedList( set_slice_component) + RPAREN set_slice_record.setParseAction(SliceRecord) _set_record = set_slice_record | matrix_data | tr_matrix_data | Suppress( ":=") set_record = simple_data | _set_record non_dimen_set_record = non_dimen_simple_data | _set_record set_def_stmt = "set" + symbol + Optional(subscript_domain) + \ Optional("dimen" + integer.setResultsName('dimen')) + END set_def_stmt.setParseAction(SetDefStmt)
attr = Word(string.ascii_letters, string.ascii_letters + string.digits + ';-',) attr.leaveWhitespace() attr.setName('attr') hexdigits = Word(string.hexdigits, exact=2) hexdigits.setName('hexdigits') escaped = Suppress(Literal('\\')) + hexdigits escaped.setName('escaped') def _p_escaped(s, l, t): text = t[0] return chr(int(text, 16)) escaped.setParseAction(_p_escaped) value = Combine(OneOrMore(CharsNotIn('*()\\\0') | escaped)) value.setName('value') equal = Literal("=") equal.setParseAction(lambda s, l, t: pureldap.LDAPFilter_equalityMatch) approx = Literal("~=") approx.setParseAction(lambda s, l, t: pureldap.LDAPFilter_approxMatch) greater = Literal(">=") greater.setParseAction(lambda s, l, t: pureldap.LDAPFilter_greaterOrEqual) less = Literal("<=") less.setParseAction(lambda s, l, t: pureldap.LDAPFilter_lessOrEqual) filtertype = equal | approx | greater | less filtertype.setName('filtertype') simple = attr + filtertype + value simple.leaveWhitespace() simple.setName('simple')
RETURNS = Keyword('returns') SERVICE = Keyword('service') OPTION = Keyword('option') ENUM = Keyword('enum') ONEOF = Keyword('oneof') REQUIRED = Keyword('required') OPTIONAL = Keyword('optional') REPEATED = Keyword('repeated') TRUE = Keyword('true') FALSE = Keyword('false') message_body = Forward() message_definition = Suppress(MESSAGE) - identifier("message_id") + Suppress( LBRACE) + message_body("message_body") + Suppress(RBRACE) message_definition.setParseAction(message_definition_fn) enum_definition = ENUM - identifier + LBRACE + ZeroOrMore( Group(identifier + EQ + integer + SEMI)) + RBRACE DOUBLE = Keyword("double") INT32 = Keyword("int32") UINT32 = Keyword("uint32") BOOL = Keyword("bool") STRING = Keyword("string") type_ = (DOUBLE | UINT32 | BOOL | STRING | identifier) type_.setParseAction(type_fn) qualifier = (REQUIRED | OPTIONAL | REPEATED)("qualifier") qualifier.setParseAction(qualifier_fn) field = qualifier - type_("type_") + identifier("identifier") + EQ + integer( "field_number") + SEMI
def parse(string=None, filename=None, token=None, lang=None): """ Parse a token stream from or raise a SyntaxError This function includes the parser grammar. """ if not lang: lang = guess_language(string, filename) # # End of Line # EOL = Suppress(lineEnd) UTFWORD = Word(unicodePrintables) # # @tag # TAG = Suppress('@') + UTFWORD # # A table # # A table is made up of rows of cells, e.g. # # | column 1 | column 2 | # # Table cells need to be able to handle escaped tokens such as \| and \n # def handle_esc_char(tokens): token = tokens[0] if token == r'\|': return u'|' elif token == r'\n': return u'\n' elif token == r'\\': return u'\\' raise NotImplementedError(u"Unknown token: %s" % token) ESC_CHAR = Word(initChars=r'\\', bodyChars=unicodePrintables, exact=2) ESC_CHAR.setParseAction(handle_esc_char) # # A cell can contain anything except a cell marker, new line or the # beginning of a cell marker, we then handle escape characters separately # and recombine the cell afterwards # CELL = OneOrMore(CharsNotIn('|\n\\') + Optional(ESC_CHAR)) CELL.setParseAction(lambda tokens: u''.join(tokens)) TABLE_ROW = Suppress('|') + OneOrMore(CELL + Suppress('|')) + EOL TABLE_ROW.setParseAction(lambda tokens: [v.strip() for v in tokens]) TABLE = Group(OneOrMore(Group(TABLE_ROW))) # # Multiline string # def clean_multiline_string(s, loc, tokens): """ Clean a multiline string The indent level of a multiline string is the indent level of the triple-". We have to derive this by walking backwards from the location of the quoted string token to the newline before it. We also want to remove the leading and trailing newline if they exist. FIXME: assumes UNIX newlines """ def remove_indent(multiline, indent): """ Generate the lines removing the indent """ for line in multiline.splitlines(): if line and not line[:indent].isspace(): warn("%s: %s: under-indented multiline string " "truncated: '%s'" % (lineno(loc, s), col(loc, s), line), LettuceSyntaxWarning) # for those who are surprised by this, slicing a string # shorter than indent will yield empty string, not IndexError yield line[indent:] # determine the indentation offset indent = loc - s.rfind('\n', 0, loc) - 1 multiline = '\n'.join(remove_indent(tokens[0], indent)) # remove leading and trailing newlines if multiline[0] == '\n': multiline = multiline[1:] if multiline[-1] == '\n': multiline = multiline[:-1] return multiline MULTILINE = QuotedString('"""', multiline=True) MULTILINE.setParseAction(clean_multiline_string) # A Step # # Steps begin with a keyword such as Given, When, Then or And They can # contain an optional inline comment, although it's possible to encapsulate # it in a string. Finally they can contain a table or a multiline 'Python' # string. # # <variables> are not parsed as part of the grammar as it's not easy to # distinguish between a variable and XML. Instead scenarios will replace # instances in the steps based on the outline keys. # STATEMENT_SENTENCE = Group( lang.STATEMENT + # Given, When, Then, And OneOrMore(UTFWORD.setWhitespaceChars(' \t') | quotedString.setWhitespaceChars(' \t')) + EOL ) STATEMENT = Group( STATEMENT_SENTENCE('sentence') + Optional(TABLE('table') | MULTILINE('multiline')) ) STATEMENT.setParseAction(Step) STATEMENTS = Group(ZeroOrMore(STATEMENT)) # # Background: # BACKGROUND_DEFN = \ lang.BACKGROUND('keyword') + Suppress(':') + EOL BACKGROUND_DEFN.setParseAction(Background) BACKGROUND = Group( BACKGROUND_DEFN('node') + STATEMENTS('statements') ) BACKGROUND.setParseAction(Background.add_statements) # # Scenario: description # SCENARIO_DEFN = Group( Group(ZeroOrMore(TAG))('tags') + lang.SCENARIO('keyword') + Suppress(':') + restOfLine('name') + EOL ) SCENARIO_DEFN.setParseAction(Scenario) SCENARIO = Group( SCENARIO_DEFN('node') + STATEMENTS('statements') + Group(ZeroOrMore( Suppress(lang.EXAMPLES + ':') + EOL + TABLE ))('outlines') ) SCENARIO.setParseAction(Scenario.add_statements) # # Feature: description # FEATURE_DEFN = Group( Group(ZeroOrMore(TAG))('tags') + lang.FEATURE('keyword') + Suppress(':') + restOfLine('name') + EOL ) FEATURE_DEFN.setParseAction(Feature) # # A description composed of zero or more lines, before the # Background/Scenario block # DESCRIPTION_LINE = Group( ~BACKGROUND_DEFN + ~SCENARIO_DEFN + OneOrMore(UTFWORD).setWhitespaceChars(' \t') + EOL ) DESCRIPTION = Group(ZeroOrMore(DESCRIPTION_LINE | EOL)) DESCRIPTION.setParseAction(Description) # # Complete feature file definition # FEATURE = Group( FEATURE_DEFN('node') + DESCRIPTION('description') + Optional(BACKGROUND('background')) + Group(OneOrMore(SCENARIO))('scenarios') + stringEnd) FEATURE.ignore(pythonStyleComment) FEATURE.setParseAction(Feature.add_blocks) # # Try parsing the string # if not token: token = FEATURE else: token = locals()[token] try: if string: tokens = token.parseString(string) elif filename: with open(filename, 'r', 'utf-8') as fp: tokens = token.parseFile(fp) else: raise RuntimeError("Must pass string or filename") return tokens except ParseException as e: if e.parserElement == stringEnd: msg = "Expected EOF (max one feature per file)" else: msg = e.msg raise LettuceSyntaxError( filename, u"{lineno}:{col} Syntax Error: {msg}\n{line}\n{space}^".format( msg=msg, lineno=e.lineno, col=e.col, line=e.line, space=' ' * (e.col - 1))) except LettuceSyntaxError as e: # reraise the exception with the filename raise LettuceSyntaxError(filename, e.string)
def build_jimple_parser(self): # Literals op_add = Literal("+") op_sub = Literal("-") op_mul = Literal("*") op_div = Literal("/") op_xor = Literal("^") op_lt = Literal("<") op_gt = Literal(">") op_eq = Literal("==") op_neq = Literal("!=") op_lte = Literal("<=") op_gte = Literal(">=") op_sls = Literal("<<") op_srs = Literal(">>") op_urs = Literal(">>>") op_mod = Literal("%") op_rem = Literal("rem") op_bwa = Literal("&") op_bwo = Literal("|") op_cmp = Literal("cmp") op_cmpg = Literal("cmpg") op_cmpl = Literal("cmpl") lit_lcb = Literal("{").suppress() lit_rcb = Literal("}").suppress() lit_lp = Literal("(").suppress() lit_rp = Literal(")").suppress() lit_dot = Literal(".").suppress() lit_asgn = Literal("=").suppress() lit_ident = Literal(":=").suppress() lit_strm = Literal(";").suppress() lit_cln = Literal(":").suppress() lit_lsb = Literal("[").suppress() lit_rsb = Literal("]").suppress() binop = op_add ^ op_sub ^ op_mul ^ op_div ^ op_xor \ ^ op_bwa ^ op_mod ^ op_rem ^ op_urs ^ op_lte \ ^ op_gte ^ op_sls ^ op_srs ^ op_lt ^ op_gt \ ^ op_eq ^ op_neq ^ op_bwo ^ op_cmp ^ op_cmpg \ ^ op_cmpl \ cond_op = op_gte ^ op_lte ^ op_lt ^ op_gt ^ op_eq ^ op_neq # Keywords kw_specialinvoke = Keyword("specialinvoke") kw_interfaceinvoke = Keyword("interfaceinvoke") kw_virtualinvoke = Keyword("virtualinvoke") kw_staticinvoke = Keyword("staticinvoke") kw_instanceof = Keyword("instanceof") kw_new = Keyword("new") kw_newarray = Keyword("newarray") kw_newmultiarray = Keyword("newmultiarray") kw_length = Keyword("lengthof") kw_neg = Keyword("neg") kw_goto = Keyword("goto") kw_if = Keyword("if") kw_this = Keyword("@this") kw_caughtexception = Keyword("@caughtexception") kw_lookupswitch = Keyword("lookupswitch") kw_case = Keyword("case") kw_default = Keyword("default") kw_return = Keyword("return") kw_entermonitor = Keyword("entermonitor") kw_exitmonitor = Keyword("exitmonitor") kw_throw = Keyword("throw") kw_throws = Keyword("throws") kw_catch = Keyword("catch") kw_transient = Keyword("transient") kw_from = Keyword("from") kw_to = Keyword("to") kw_with = Keyword("with") kw_breakpoint = Keyword("breakpoint") kw_nop = Keyword("nop") kw_public = Keyword("public") kw_protected = Keyword("protected") kw_private = Keyword("private") kw_volatile = Keyword("volatile") kw_static = Keyword("static") kw_annotation = Keyword("annotation") kw_final = Keyword("final") kw_class = Keyword("class") kw_enum = Keyword("enum") kw_interface = Keyword("interface") kw_abstract = Keyword("abstract") kw_extends = Keyword("extends") kw_implements = Keyword("implements") kw_null = Keyword("null") modifier = \ kw_public | kw_protected | kw_private \ | kw_static | kw_abstract | kw_final \ | kw_volatile | kw_enum | kw_transient \ | kw_annotation #Identifiers id_local = Combine(Optional(Literal("$")) + Word(alphas) + Word(nums)) id_java = Word(alphas + "'$_", alphanums + "'$_") id_class_comp = Word(alphas + "_", alphanums + "$_") id_type = Combine(id_class_comp + ZeroOrMore(Combine(Literal(".") + (id_class_comp))) + Optional(Word("[]"))) id_method_name = id_java | Word("<clinit>") | Word("<init>") id_label = Combine(Literal("label") + Word(nums)) id_parameter = Combine(Literal("@parameter") + Word(nums)) # Field field_specifier = \ Suppress(Literal("<")) \ + id_type + lit_cln + id_type + id_java \ + Suppress(Literal(">")) field_specifier.setParseAction(self.field_specifier_parse_action) # Method method_param_list = delimitedList(id_type, delim=",") id_method = \ Suppress(Literal("<")) \ + id_type + lit_cln + id_type + id_method_name \ + lit_lp + Group(Optional(method_param_list)) + lit_rp \ + Suppress(Literal(">")) number_suffix = Optional(Literal("F") | Literal("L")) # Numeric constant expr_number = \ Combine( Word("+-" + nums, nums) + Optional(Literal(".") + Optional(Word(nums))) + Optional(Literal("E") + Optional(Word("+-")) + Word(nums)) + number_suffix) \ | Combine(Literal("#Infinity") + number_suffix) \ | Combine(Literal("#-Infinity") + number_suffix) \ | Combine(Literal("#NaN") + number_suffix) expr_number.setParseAction(self.expr_numeric_const_parse_action) expr_str = QuotedString(quoteChar='"', escChar="\\") expr_str.setParseAction(self.expr_str_const_parse_action) # Null constant expr_null = kw_null expr_null.setParseAction(self.expr_null_parse_action) # Group all constants expr_constant = \ expr_str \ ^ expr_number \ ^ expr_null # A 'class' expression (class + classname) expr_class = kw_class + QuotedString(quoteChar='"') expr_class.setParseAction(self.expr_class_parse_action) # A local variable expression expr_local = id_local expr_local.setParseAction(self.expr_local_parse_action) # Group together all "immediate" values expr_imm = expr_local ^ expr_constant ^ expr_class expr_imm.setParseAction(self.expr_imm_parse_action) # Conditional expression expr_cond = expr_imm + cond_op + expr_imm expr_cond.setParseAction(self.expr_cond_parse_action) # Array index array_idx = lit_lsb + expr_imm + lit_rsb empty_array_idx = lit_lsb + lit_rsb expr_binop = expr_imm + binop + expr_imm expr_binop.setParseAction(self.expr_binop_parse_action) expr_cast = lit_lp + id_type + lit_rp + expr_imm expr_cast.setParseAction(self.expr_cast_parse_action) expr_instanceof = expr_imm + kw_instanceof + id_type expr_instanceof.setParseAction(self.expr_instanceof_parse_action) expr_new = Suppress(kw_new) + id_type expr_new.setParseAction(self.expr_new_parse_action) expr_newarray = kw_newarray + lit_lp + id_type + lit_rp + array_idx expr_newarray.setParseAction(self.expr_newarray_parse_action) expr_newmultiarray = kw_newmultiarray + lit_lp + id_type + lit_rp + OneOrMore(array_idx | empty_array_idx) expr_newmultiarray.setParseAction(self.expr_newmultiarray_parse_action) expr_lengthof = kw_length + expr_imm expr_lengthof.setParseAction(self.expr_lengthof_parse_action) expr_neg = kw_neg + expr_imm expr_neg.setParseAction(self.expr_neg_parse_action) # Invoke Expressions method_arg_list = delimitedList(expr_imm, delim=",") expr_invoke = \ kw_specialinvoke \ + id_local + lit_dot + id_method \ + lit_lp + Group(Optional(method_arg_list)) + lit_rp \ | kw_interfaceinvoke \ + id_local + lit_dot + id_method \ + lit_lp + Group(Optional(method_arg_list)) + lit_rp \ | kw_virtualinvoke \ + id_local + lit_dot + id_method \ + lit_lp + Group(Optional(method_arg_list)) + lit_rp \ | kw_staticinvoke + id_method \ + lit_lp + Group(Optional(method_arg_list)) + lit_rp expr_invoke.setParseAction(self.expr_invoke_parse_action) expr = \ expr_binop \ ^ expr_cast \ ^ expr_instanceof \ ^ expr_invoke \ ^ expr_new \ ^ expr_newarray \ ^ expr_newmultiarray \ ^ expr_lengthof \ ^ expr_neg expr.setParseAction(self.expr_parse_action) # Concrete Reference Expression expr_field_ref = Group(Optional(id_local + lit_dot)) + field_specifier expr_field_ref.setParseAction(self.expr_field_ref_parse_action) expr_array_ref = id_local + array_idx expr_array_ref.setParseAction(self.expr_array_ref_parse_action) # L and R values expr_lvalue = \ id_local \ ^ expr_field_ref \ ^ expr_array_ref expr_lvalue.setParseAction(self.expr_lvalue_parse_action) expr_rvalue = \ expr \ ^ expr_field_ref \ ^ expr_array_ref \ ^ expr_imm # Declaration stmt_decl = \ id_type \ + Group(delimitedList(id_local, delim=",")) \ + lit_strm stmt_decl.setParseAction(self.stmt_decl_parse_action) # Statements stmt_assign = \ expr_lvalue + lit_asgn + expr_rvalue + lit_strm ''' id_local + lit_asgn + expr_rvalue + lit_strm \ ^ field_specifier + lit_asgn + expr_imm + lit_strm \ ^ id_local + lit_dot + field_specifier + lit_asgn + expr_imm + lit_strm \ ^ id_local + lit_lsb + expr_imm + lit_rsb + lit_asgn + expr_imm + lit_strm ''' stmt_assign.setParseAction(self.stmt_assign_parse_action) stmt_identity = \ id_local + lit_ident + kw_this + lit_cln + id_type + lit_strm \ ^ id_local + lit_ident + id_parameter + lit_cln + id_type + lit_strm \ ^ id_local + lit_ident + kw_caughtexception + lit_strm stmt_identity.setParseAction(self.stmt_identity_parse_action) stmt_goto = kw_goto + id_label + lit_strm stmt_goto.setParseAction(self.stmt_goto_parse_action) stmt_if = Suppress(kw_if) + expr_cond + Suppress(kw_goto) + id_label + lit_strm stmt_if.setParseAction(self.stmt_if_parse_action) stmt_invoke = expr_invoke + lit_strm stmt_invoke.setParseAction(self.stmt_invoke_parse_action) switch_case = kw_case + expr_number + lit_cln + kw_goto + id_label + lit_strm switch_default = kw_default + lit_cln + kw_goto + id_label + lit_strm switch_body = ZeroOrMore(switch_case) + Optional(switch_default) stmt_switch = kw_lookupswitch + lit_lp + expr_imm + lit_rp + lit_lcb + switch_body + lit_rcb + lit_strm stmt_switch.setParseAction(self.stmt_switch_parse_action) stmt_enter_monitor = kw_entermonitor + expr_imm + lit_strm stmt_enter_monitor.setParseAction(self.stmt_enter_monitor_parse_action) stmt_exit_monitor = kw_exitmonitor + expr_imm + lit_strm stmt_exit_monitor.setParseAction(self.stmt_exit_monitor_parse_action) stmt_return = Suppress(kw_return) + expr_imm + lit_strm | Suppress(kw_return) + lit_strm stmt_return.setParseAction(self.stmt_return_parse_action) stmt_throw = kw_throw + expr_imm + lit_strm stmt_throw.setParseAction(self.stmt_throw_parse_action) stmt_catch = kw_catch + id_type \ + kw_from + id_label \ + kw_to + id_label \ + kw_with + id_label + lit_strm stmt_catch.setParseAction(self.stmt_catch_parse_action) stmt_breakpoint = kw_breakpoint + lit_strm stmt_breakpoint.setParseAction(self.stmt_breakpoint_parse_action) stmt_nop = kw_nop + lit_strm stmt_nop.setParseAction(self.stmt_nop_parse_action) jimple_stmt = \ stmt_decl \ ^ stmt_assign \ ^ stmt_identity \ ^ stmt_goto \ ^ stmt_if \ ^ stmt_invoke \ ^ stmt_switch \ ^ stmt_enter_monitor \ ^ stmt_exit_monitor \ ^ stmt_return \ ^ stmt_throw \ ^ stmt_catch \ ^ stmt_breakpoint \ ^ stmt_nop jimple_stmt.setParseAction(self.stmt_parse_action) throws_clause = kw_throws + delimitedList(id_type, delim=",") method_sig = \ Group(ZeroOrMore(modifier)) \ + id_type + id_method_name \ + lit_lp + Group(Optional(method_param_list)) + lit_rp \ + Group(Optional(throws_clause)) method_decl = method_sig + lit_strm field_decl = ZeroOrMore(modifier) + id_type + id_java + lit_strm field_decl.setParseAction(self.field_decl_parse_action) class_decl = \ Group(ZeroOrMore(modifier)) + Suppress(kw_class) + id_type \ + Optional(kw_extends + delimitedList(id_type, delim=",")) \ + Optional(kw_implements + delimitedList(id_type, delim=",")) interface_decl = \ Group(ZeroOrMore(modifier)) + Suppress(kw_interface) + id_type \ + Optional(kw_extends + delimitedList(id_type, delim=",")) \ + Optional(kw_implements + delimitedList(id_type, delim=",")) jimple_method_item = \ jimple_stmt \ | Combine(id_label + lit_cln).setParseAction(self.label_parse_action) jimple_method_body = ZeroOrMore(jimple_method_item) jimple_method = \ Group(method_sig) + lit_lcb \ + Group(jimple_method_body) \ + lit_rcb jimple_method.setParseAction(self.method_defn_parse_action) jimple_class_item = field_decl | method_decl | jimple_method jimple_class_body = ZeroOrMore(jimple_class_item) jimple_class = Group(class_decl | interface_decl) + lit_lcb + Group(jimple_class_body) + lit_rcb jimple_class.setParseAction(self.class_defn_parse_action) return jimple_class
_word_function = Literal("extend") + Suppress("(") + _basic_expr + "," + _basic_expr + Suppress(")") | Literal( "resize" ) + Suppress("(") + _basic_expr + "," + _basic_expr + Suppress(")") _word_function.setParseAction(lambda s, l, t: WordFunction(t[0], t[1], t[2])) _count = Literal("count") + Suppress("(") + delimitedList(_basic_expr) + Suppress(")") _count.setParseAction(lambda s, l, t: Count(t[1])) _next = Literal("next") + Suppress("(") + _basic_expr + Suppress(")") _next.setParseAction(lambda s, l, t: Next(t[1])) _case_case = _basic_expr + Suppress(":") + _basic_expr + Suppress(";") _case_body = OneOrMore(_case_case) _case_body.setParseAction(lambda s, l, t: OrderedDict(zip(t[::2], t[1::2]))) _case = Suppress("case") + _case_body + Suppress("esac") _case.setParseAction(lambda s, l, t: Case(t[0])) _base = complex_identifier ^ ( _conversion | _word_function | _count | _next | Suppress("(") + _basic_expr + Suppress(")") | _case | constant ) _ap = Forward() _array_subscript = Group(Suppress("[") + _basic_expr + Suppress("]")) _word_bit_selection = Group(Suppress("[") + _basic_expr + Suppress(":") + _basic_expr + Suppress("]")) _ap <<= Optional(_array_subscript + _ap | _word_bit_selection + _ap) _array = _base + _ap def _handle_array(tokens):
def _build_asn1_grammar(): def build_identifier(prefix_pattern): identifier_suffix = Optional(Word(srange('[-0-9a-zA-Z]'))) # todo: more rigorous? trailing hyphens and -- forbidden return Combine(Word(srange(prefix_pattern), exact=1) + identifier_suffix) def braced_list(element_rule): elements_rule = Optional(delimitedList(element_rule)) return Suppress('{') + Group(elements_rule) + Suppress('}') def annotate(name): def annotation(t): return AnnotatedToken(name, t.asList()) return annotation # Reserved words ANY = Keyword('ANY') DEFINED_BY = Keyword('DEFINED BY') DEFINITIONS = Keyword('DEFINITIONS') BEGIN = Keyword('BEGIN') END = Keyword('END') OPTIONAL = Keyword('OPTIONAL') DEFAULT = Keyword('DEFAULT') TRUE = Keyword('TRUE') FALSE = Keyword('FALSE') UNIVERSAL = Keyword('UNIVERSAL') APPLICATION = Keyword('APPLICATION') PRIVATE = Keyword('PRIVATE') MIN = Keyword('MIN') MAX = Keyword('MAX') IMPLICIT = Keyword('IMPLICIT') EXPLICIT = Keyword('EXPLICIT') EXPLICIT_TAGS = Keyword('EXPLICIT TAGS') IMPLICIT_TAGS = Keyword('IMPLICIT TAGS') AUTOMATIC_TAGS = Keyword('AUTOMATIC TAGS') EXTENSIBILITY_IMPLIED = Keyword('EXTENSIBILITY IMPLIED') COMPONENTS_OF = Keyword('COMPONENTS OF') ELLIPSIS = Keyword('...') SIZE = Keyword('SIZE') OF = Keyword('OF') IMPORTS = Keyword('IMPORTS') EXPORTS = Keyword('EXPORTS') FROM = Keyword('FROM') # Built-in types SEQUENCE = Keyword('SEQUENCE') SET = Keyword('SET') CHOICE = Keyword('CHOICE') ENUMERATED = Keyword('ENUMERATED') BIT_STRING = Keyword('BIT STRING') BOOLEAN = Keyword('BOOLEAN') REAL = Keyword('REAL') OCTET_STRING = Keyword('OCTET STRING') CHARACTER_STRING = Keyword('CHARACTER STRING') NULL = Keyword('NULL') INTEGER = Keyword('INTEGER') OBJECT_IDENTIFIER = Keyword('OBJECT IDENTIFIER') # Restricted string types BMPString = Keyword('BMPString') GeneralString = Keyword('GeneralString') GraphicString = Keyword('GraphicString') IA5String = Keyword('IA5String') ISO646String = Keyword('ISO646String') NumericString = Keyword('NumericString') PrintableString = Keyword('PrintableString') TeletexString = Keyword('TeletexString') T61String = Keyword('T61String') UniversalString = Keyword('UniversalString') UTF8String = Keyword('UTF8String') VideotexString = Keyword('VideotexString') VisibleString = Keyword('VisibleString') # Useful types GeneralizedTime = Keyword('GeneralizedTime') UTCTime = Keyword('UTCTime') ObjectDescriptor = Keyword('ObjectDescriptor') # Literals number = Word(nums) signed_number = Combine(Optional('-') + number) # todo: consider defined values from 18.1 bstring = Suppress('\'') + StringOf('01') + Suppress('\'B') hstring = Suppress('\'') + StringOf('0123456789ABCDEF') + Suppress('\'H') # Comments hyphen_comment = Regex(r"--[\s\S]*?(--|$)", flags=re.MULTILINE) comment = hyphen_comment | cStyleComment # identifier identifier = build_identifier('[a-z]') # references # these are duplicated to force unique token annotations valuereference = build_identifier('[a-z]') typereference = build_identifier('[A-Z]') module_reference = build_identifier('[A-Z]') reference = valuereference | typereference # TODO: consider object references from 12.1 # values # todo: consider more literals from 16.9 boolean_value = TRUE | FALSE bitstring_value = bstring | hstring # todo: consider more forms from 21.9 integer_value = signed_number null_value = NULL cstring_value = dblQuotedString exponent = CaselessLiteral('e') + signed_number real_value = Combine(signed_number + Optional(Literal('.') + Optional(number)) + Optional(exponent)) # In value range constraints, decimal points must be followed by number, or # the grammar becomes ambiguous: ([1.].100) vs ([1]..[100]) constraint_real_value = Combine(signed_number + Optional(Literal('.') + number) + Optional(exponent)) builtin_value = boolean_value | bitstring_value | real_value | integer_value | null_value | cstring_value external_value_reference = module_reference + Suppress('.') + valuereference defined_value = external_value_reference | valuereference # todo: more options from 13.1 referenced_value = Unique(defined_value) # todo: more options from 16.11 # object identifier value name_form = Unique(identifier) number_form = Unique(number) name_and_number_form = name_form + Suppress('(') + number_form + Suppress(')') objid_components = name_and_number_form | name_form | number_form | defined_value objid_components_list = OneOrMore(objid_components) object_identifier_value = Suppress('{') + \ (objid_components_list | (defined_value + objid_components_list)) + \ Suppress('}') value = builtin_value | referenced_value | object_identifier_value # definitive identifier value definitive_number_form = Unique(number) definitive_name_and_number_form = name_form + Suppress('(') + definitive_number_form + Suppress(')') definitive_objid_component = definitive_name_and_number_form | name_form | definitive_number_form definitive_objid_component_list = OneOrMore(definitive_objid_component) definitive_identifier = Optional(Suppress('{') + definitive_objid_component_list + Suppress('}')) # tags class_ = UNIVERSAL | APPLICATION | PRIVATE class_number = Unique(number) # todo: consider defined values from 30.1 tag = Suppress('[') + Optional(class_) + class_number + Suppress(']') tag_default = EXPLICIT_TAGS | IMPLICIT_TAGS | AUTOMATIC_TAGS # extensions extension_default = Unique(EXTENSIBILITY_IMPLIED) # values # Forward-declare these, they can only be fully defined once # we have all types defined. There are some circular dependencies. named_type = Forward() type_ = Forward() # constraints # todo: consider the full subtype and general constraint syntax described in 45.* lower_bound = (constraint_real_value | signed_number | referenced_value | MIN) upper_bound = (constraint_real_value | signed_number | referenced_value | MAX) single_value_constraint = Suppress('(') + value + Suppress(')') value_range_constraint = Suppress('(') + lower_bound + Suppress('..') + upper_bound + Suppress(')') # TODO: Include contained subtype constraint here if we ever implement it. size_constraint = Optional(Suppress('(')) + Suppress(SIZE) + (single_value_constraint | value_range_constraint) + Optional(Suppress(')')) # types # todo: consider other defined types from 13.1 defined_type = Optional(module_reference + Suppress('.'), default=None) + typereference + Optional(size_constraint, default=None) # TODO: consider exception syntax from 24.1 extension_marker = Unique(ELLIPSIS) component_type_optional = named_type + Suppress(OPTIONAL) component_type_default = named_type + Suppress(DEFAULT) + value component_type_components_of = Suppress(COMPONENTS_OF) + type_ component_type = component_type_components_of | component_type_optional | component_type_default | named_type tagged_type = tag + Optional(IMPLICIT | EXPLICIT, default=None) + type_ named_number_value = Suppress('(') + signed_number + Suppress(')') named_number = identifier + named_number_value named_nonumber = Unique(identifier) enumeration = named_number | named_nonumber set_type = SET + braced_list(component_type | extension_marker) sequence_type = SEQUENCE + braced_list(component_type | extension_marker) sequenceof_type = Suppress(SEQUENCE) + Optional(size_constraint, default=None) + Suppress(OF) + (type_ | named_type) setof_type = Suppress(SET) + Optional(size_constraint, default=None) + Suppress(OF) + (type_ | named_type) choice_type = CHOICE + braced_list(named_type | extension_marker) selection_type = identifier + Suppress('<') + type_ enumerated_type = ENUMERATED + braced_list(enumeration | extension_marker) bitstring_type = BIT_STRING + Optional(braced_list(named_number), default=[]) + Optional(single_value_constraint | size_constraint, default=None) plain_integer_type = INTEGER restricted_integer_type = INTEGER + braced_list(named_number) + Optional(single_value_constraint, default=None) boolean_type = BOOLEAN real_type = REAL null_type = NULL object_identifier_type = OBJECT_IDENTIFIER octetstring_type = OCTET_STRING + Optional(size_constraint) unrestricted_characterstring_type = CHARACTER_STRING restricted_characterstring_type = BMPString | GeneralString | \ GraphicString | IA5String | \ ISO646String | NumericString | \ PrintableString | TeletexString | \ T61String | UniversalString | \ UTF8String | VideotexString | \ VisibleString characterstring_type = (restricted_characterstring_type | unrestricted_characterstring_type) + Optional(size_constraint) useful_type = GeneralizedTime | UTCTime | ObjectDescriptor # ANY type any_type = ANY + Optional(Suppress(DEFINED_BY + identifier)) # todo: consider other builtins from 16.2 simple_type = (any_type | boolean_type | null_type | octetstring_type | characterstring_type | real_type | plain_integer_type | object_identifier_type | useful_type) + Optional(value_range_constraint | single_value_constraint) constructed_type = choice_type | sequence_type | set_type value_list_type = restricted_integer_type | enumerated_type builtin_type = value_list_type | tagged_type | simple_type | constructed_type | sequenceof_type | setof_type | bitstring_type referenced_type = defined_type | selection_type # todo: consider other ref:d types from 16.3 type_ << (builtin_type | referenced_type) named_type << (identifier + type_) type_assignment = typereference + '::=' + type_ value_assignment = valuereference + type_ + '::=' + value assignment = type_assignment | value_assignment assignment_list = ZeroOrMore(assignment) # TODO: Maybe handle full assigned-identifier syntax with defined values # described in 12.1, but I haven't been able to find examples of it, and I # can't say for sure what acceptable syntax is. global_module_reference = module_reference + Optional(object_identifier_value, default=None) symbol = Unique(reference) # TODO: parameterized reference? symbol_list = delimitedList(symbol, delim=',') symbols_from_module = Group(Group(symbol_list) + Suppress(FROM) + global_module_reference) symbols_from_module_list = OneOrMore(symbols_from_module) symbols_imported = Unique(symbols_from_module_list) exports = Suppress(EXPORTS) + Optional(symbol_list) + Suppress(';') imports = Suppress(IMPORTS) + Optional(symbols_imported) + Suppress(';') module_body = Optional(exports, default=None) + Optional(imports, default=None) + assignment_list module_identifier = module_reference + definitive_identifier module_definition = module_identifier + Suppress(DEFINITIONS) + Optional(tag_default, default=None) + \ Optional(extension_default, default=None) + Suppress('::=') + \ Suppress(BEGIN) + module_body + Suppress(END) module_definition.ignore(comment) # Mark up the parse results with token tags identifier.setParseAction(annotate('Identifier')) named_number_value.setParseAction(annotate('Value')) tag.setParseAction(annotate('Tag')) class_.setParseAction(annotate('TagClass')) class_number.setParseAction(annotate('TagClassNumber')) type_.setParseAction(annotate('Type')) simple_type.setParseAction(annotate('SimpleType')) choice_type.setParseAction(annotate('ChoiceType')) sequence_type.setParseAction(annotate('SequenceType')) set_type.setParseAction(annotate('SetType')) value_list_type.setParseAction(annotate('ValueListType')) bitstring_type.setParseAction(annotate('BitStringType')) sequenceof_type.setParseAction(annotate('SequenceOfType')) setof_type.setParseAction(annotate('SetOfType')) named_number.setParseAction(annotate('NamedValue')) named_nonumber.setParseAction(annotate('NamedValue')) single_value_constraint.setParseAction(annotate('SingleValueConstraint')) size_constraint.setParseAction(annotate('SizeConstraint')) value_range_constraint.setParseAction(annotate('ValueRangeConstraint')) component_type.setParseAction(annotate('ComponentType')) component_type_optional.setParseAction(annotate('ComponentTypeOptional')) component_type_default.setParseAction(annotate('ComponentTypeDefault')) component_type_components_of.setParseAction(annotate('ComponentTypeComponentsOf')) tagged_type.setParseAction(annotate('TaggedType')) named_type.setParseAction(annotate('NamedType')) type_assignment.setParseAction(annotate('TypeAssignment')) value_assignment.setParseAction(annotate('ValueAssignment')) module_reference.setParseAction(annotate('ModuleReference')) global_module_reference.setParseAction(annotate('GlobalModuleReference')) module_body.setParseAction(annotate('ModuleBody')) module_definition.setParseAction(annotate('ModuleDefinition')) extension_marker.setParseAction(annotate('ExtensionMarker')) name_form.setParseAction(annotate('NameForm')) number_form.setParseAction(annotate('NumberForm')) name_and_number_form.setParseAction(annotate('NameAndNumberForm')) object_identifier_value.setParseAction(annotate('ObjectIdentifierValue')) definitive_identifier.setParseAction(annotate('DefinitiveIdentifier')) definitive_number_form.setParseAction(annotate('DefinitiveNumberForm')) definitive_name_and_number_form.setParseAction(annotate('DefinitiveNameAndNumberForm')) exports.setParseAction(annotate('Exports')) imports.setParseAction(annotate('Imports')) assignment_list.setParseAction(annotate('AssignmentList')) bstring.setParseAction(annotate('BinaryStringValue')) hstring.setParseAction(annotate('HexStringValue')) defined_type.setParseAction(annotate('DefinedType')) selection_type.setParseAction(annotate('SelectionType')) referenced_value.setParseAction(annotate('ReferencedValue')) start = OneOrMore(module_definition) return start
| Literal("resize") + Suppress("(") + _basic_expr + "," + _basic_expr + Suppress(")")) _word_function.setParseAction(lambda s, l, t: WordFunction(t[0], t[1], t[2])) _count = (Literal("count") + Suppress("(") + delimitedList(_basic_expr) + Suppress(")")) _count.setParseAction(lambda s, l, t: Count(t[1])) _next = Literal("next") + Suppress("(") + _basic_expr + Suppress(")") _next.setParseAction(lambda s, l, t: Next(t[1])) _case_case = _basic_expr + Suppress(":") + _basic_expr + Suppress(";") _case_body = OneOrMore(_case_case) _case_body.setParseAction(lambda s, l, t: OrderedDict(zip(t[::2], t[1::2]))) _case = Suppress("case") + _case_body + Suppress("esac") _case.setParseAction(lambda s, l, t: Case(t[0])) _base = (complex_identifier ^ (_conversion | _word_function | _count | _next | Suppress("(") + _basic_expr + Suppress(")") | _case | constant)) _ap = Forward() _array_subscript = Group(Suppress("[") + _basic_expr + Suppress("]")) _word_bit_selection = Group( Suppress("[") + _basic_expr + Suppress(":") + _basic_expr + Suppress("]"))
from .qa import ApplicationRef, FilePattern, FileGroup, Qa, CommandLine, \ InputLine TEST_ID = Suppress("ID") + Word(alphanums + '-' + '_')('id') APPLICATION_REF = oneOf(['AP', 'AA', 'AQ']) + Word(alphas)('appname') + \ Optional( Suppress('AB') + Word(alphas)('embassypack')) def _get_application_ref(token): return ApplicationRef(token['appname'], token.get('embassypack',None)) APPLICATION_REF.setParseAction(_get_application_ref) CL_LINE = Suppress("CL ") + restOfLine('line') def _get_cl_line(token): return CommandLine(token['line']) CL_LINE.setParseAction(_get_cl_line) CL_LINES = Group(ZeroOrMore(CL_LINE))('cl_lines') def _get_cl_lines(token): return token['cl_lines'] CL_LINES.setParseAction(_get_cl_lines) IN_LINE = Suppress("IN ") + restOfLine('line') def _get_in_line(token): return InputLine(token['line']) IN_LINE.setParseAction(_get_in_line) IN_LINES = Group(ZeroOrMore(IN_LINE))('in_lines') def _get_in_lines(token): return token['in_lines'] IN_LINES.setParseAction(_get_in_lines)
def __init__(self, fragment_file, sdkconfig): try: fragment_file = open(fragment_file, "r") except TypeError: pass path = os.path.realpath(fragment_file.name) indent_stack = [1] class parse_ctx: fragment = None # current fragment key = "" # current key keys = list() # list of keys parsed key_grammar = None # current key grammar @staticmethod def reset(): parse_ctx.fragment_instance = None parse_ctx.key = "" parse_ctx.keys = list() parse_ctx.key_grammar = None def fragment_type_parse_action(toks): parse_ctx.reset() parse_ctx.fragment = FRAGMENT_TYPES[ toks[0]]() # create instance of the fragment return None def expand_conditionals(toks, stmts): try: stmt = toks["value"] stmts.append(stmt) except KeyError: try: conditions = toks["conditional"] for condition in conditions: try: _toks = condition[1] _cond = condition[0] if sdkconfig.evaluate_expression(_cond): expand_conditionals(_toks, stmts) break except IndexError: expand_conditionals(condition[0], stmts) except KeyError: for tok in toks: expand_conditionals(tok, stmts) def key_body_parsed(pstr, loc, toks): stmts = list() expand_conditionals(toks, stmts) if parse_ctx.key_grammar.min and len( stmts) < parse_ctx.key_grammar.min: raise ParseFatalException( pstr, loc, "fragment requires at least %d values for key '%s'" % (parse_ctx.key_grammar.min, parse_ctx.key)) if parse_ctx.key_grammar.max and len( stmts) > parse_ctx.key_grammar.max: raise ParseFatalException( pstr, loc, "fragment requires at most %d values for key '%s'" % (parse_ctx.key_grammar.max, parse_ctx.key)) try: parse_ctx.fragment.set_key_value(parse_ctx.key, stmts) except Exception as e: raise ParseFatalException( pstr, loc, "unable to add key '%s'; %s" % (parse_ctx.key, e.message)) return None key = Word(alphanums + "_") + Suppress(":") key_stmt = Forward() condition_block = indentedBlock(key_stmt, indent_stack) key_stmts = OneOrMore(condition_block) key_body = Suppress(key) + key_stmts key_body.setParseAction(key_body_parsed) condition = originalTextFor( SDKConfig.get_expression_grammar()).setResultsName("condition") if_condition = Group( Suppress("if") + condition + Suppress(":") + condition_block) elif_condition = Group( Suppress("elif") + condition + Suppress(":") + condition_block) else_condition = Group( Suppress("else") + Suppress(":") + condition_block) conditional = (if_condition + Optional(OneOrMore(elif_condition)) + Optional(else_condition)).setResultsName("conditional") def key_parse_action(pstr, loc, toks): key = toks[0] if key in parse_ctx.keys: raise ParseFatalException( pstr, loc, "duplicate key '%s' value definition" % parse_ctx.key) parse_ctx.key = key parse_ctx.keys.append(key) try: parse_ctx.key_grammar = parse_ctx.fragment.get_key_grammars( )[key] key_grammar = parse_ctx.key_grammar.grammar except KeyError: raise ParseFatalException( pstr, loc, "key '%s' is not supported by fragment" % key) except Exception as e: raise ParseFatalException( pstr, loc, "unable to parse key '%s'; %s" % (key, e.message)) key_stmt << (conditional | Group(key_grammar).setResultsName("value")) return None def name_parse_action(pstr, loc, toks): parse_ctx.fragment.name = toks[0] key.setParseAction(key_parse_action) ftype = Word(alphas).setParseAction(fragment_type_parse_action) fid = Suppress(":") + Word(alphanums + "_.").setResultsName("name") fid.setParseAction(name_parse_action) header = Suppress("[") + ftype + fid + Suppress("]") def fragment_parse_action(pstr, loc, toks): key_grammars = parse_ctx.fragment.get_key_grammars() required_keys = set( [k for (k, v) in key_grammars.items() if v.required]) present_keys = required_keys.intersection(set(parse_ctx.keys)) if present_keys != required_keys: raise ParseFatalException( pstr, loc, "required keys %s for fragment not found" % list(required_keys - present_keys)) return parse_ctx.fragment fragment_stmt = Forward() fragment_block = indentedBlock(fragment_stmt, indent_stack) fragment_if_condition = Group( Suppress("if") + condition + Suppress(":") + fragment_block) fragment_elif_condition = Group( Suppress("elif") + condition + Suppress(":") + fragment_block) fragment_else_condition = Group( Suppress("else") + Suppress(":") + fragment_block) fragment_conditional = ( fragment_if_condition + Optional(OneOrMore(fragment_elif_condition)) + Optional(fragment_else_condition)).setResultsName("conditional") fragment = (header + OneOrMore(indentedBlock(key_body, indent_stack, False))).setResultsName("value") fragment.setParseAction(fragment_parse_action) fragment.ignore("#" + restOfLine) deprecated_mapping = DeprecatedMapping.get_fragment_grammar( sdkconfig, fragment_file.name).setResultsName("value") fragment_stmt << (Group(deprecated_mapping) | Group(fragment) | Group(fragment_conditional)) def fragment_stmt_parsed(pstr, loc, toks): stmts = list() expand_conditionals(toks, stmts) return stmts parser = ZeroOrMore(fragment_stmt) parser.setParseAction(fragment_stmt_parsed) self.fragments = parser.parseFile(fragment_file, parseAll=True) for fragment in self.fragments: fragment.path = path
r = vb_str.get_ms_ascii_value(c_str) # Return the result. if (log.getEffectiveLevel() == logging.DEBUG): log.debug("Asc: return %r" % r) return r def __repr__(self): return 'Asc(%s)' % repr(self.arg) # Asc() # TODO: see MS-VBAL 6.1.2.11.1.1 page 240 => AscB, AscW asc = Suppress((CaselessKeyword('Asc') | CaselessKeyword('AscW') )) + Optional(Suppress('(') + expression + Suppress(')')) asc.setParseAction(Asc) # --- StrReverse() -------------------------------------------------------------------- class StrReverse(VBA_Object): """Emulator for VBA StrReverse function. """ def __init__(self, original_str, location, tokens): super(StrReverse, self).__init__(original_str, location, tokens) # extract argument from the tokens: # Here the arg is expected to be either a string or a VBA_Object self.arg = tokens[0] def return_type(self):
# [147] DECIMAL ::= [0-9]* '.' [0-9]+ DECIMAL = Regex(r'[0-9]*\.[0-9]+') # (?![eE]) # DECIMAL.setResultsName('decimal') DECIMAL.setParseAction( lambda x: rdflib.Literal(x[0], datatype=rdflib.XSD.decimal)) # [148] DOUBLE ::= [0-9]+ '.' [0-9]* EXPONENT | '.' ([0-9])+ EXPONENT | ([0-9])+ EXPONENT DOUBLE = Regex(r'[0-9]+\.[0-9]*%(e)s|\.([0-9])+%(e)s|[0-9]+%(e)s' % {'e': EXPONENT_re}) # DOUBLE.setResultsName('double') DOUBLE.setParseAction( lambda x: rdflib.Literal(x[0], datatype=rdflib.XSD.double)) # [149] INTEGER_POSITIVE ::= '+' INTEGER INTEGER_POSITIVE = Suppress('+') + INTEGER.copy().leaveWhitespace() INTEGER_POSITIVE.setParseAction( lambda x: rdflib.Literal("+" + x[0], datatype=rdflib.XSD.integer)) # [150] DECIMAL_POSITIVE ::= '+' DECIMAL DECIMAL_POSITIVE = Suppress('+') + DECIMAL.copy().leaveWhitespace() # [151] DOUBLE_POSITIVE ::= '+' DOUBLE DOUBLE_POSITIVE = Suppress('+') + DOUBLE.copy().leaveWhitespace() # [152] INTEGER_NEGATIVE ::= '-' INTEGER INTEGER_NEGATIVE = Suppress('-') + INTEGER.copy().leaveWhitespace() INTEGER_NEGATIVE.setParseAction(lambda x: neg(x[0])) # [153] DECIMAL_NEGATIVE ::= '-' DECIMAL DECIMAL_NEGATIVE = Suppress('-') + DECIMAL.copy().leaveWhitespace() DECIMAL_NEGATIVE.setParseAction(lambda x: neg(x[0]))
ParserElement.setDefaultWhitespaceChars("\n\t") backslash = chr(92) texcmd = Forward() filler = CharsNotIn(backslash + '$') filler2 = CharsNotIn(backslash + '$' + '{}') arg = '[' + CharsNotIn("]") + ']' arg.setParseAction(argfun) dollarmath = QuotedString('$', multiline=True, unquoteResults=False) param = Suppress(Literal('{')) + ZeroOrMoreAsList( dollarmath | filler2 | QuotedString('{', endQuoteChar='}', unquoteResults=False) | texcmd) + Suppress(Literal('}')) param.setParseAction(paramfun) def bs(c): return Literal("\\" + c) singles = bs("[") | bs("]") | bs("{") | bs("}") | bs("\\") | bs("&") | bs( "_") | bs(",") | bs("#") | bs("\n") | bs(";") | bs("|") | bs("%") | bs( "*") | bs("~") | bs("^") texcmd << (singles | Word( "\\", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", min=2)) + ZeroOrMoreAsList(arg) + ZeroOrMoreAsList(param)
RETURNS = Keyword('returns') SERVICE = Keyword('service') OPTION = Keyword('option') ENUM = Keyword('enum') ONEOF = Keyword('oneof') REQUIRED = Keyword('required') OPTIONAL = Keyword('optional') REPEATED = Keyword('repeated') TRUE = Keyword('true') FALSE = Keyword('false') message_body = Forward() message_definition= Suppress(MESSAGE) - identifier("message_id") + Suppress(LBRACE) + message_body("message_body") + Suppress(RBRACE) message_definition.setParseAction(message_definition_fn) enum_definition= ENUM - identifier + LBRACE + ZeroOrMore(Group(identifier + EQ + integer + SEMI) ) + RBRACE DOUBLE = Keyword("double") INT32 = Keyword("int32") UINT32 = Keyword("uint32") BOOL = Keyword("bool") STRING = Keyword("string") type_ = (DOUBLE | UINT32 | BOOL | STRING | identifier) type_.setParseAction(type_fn) qualifier = (REQUIRED | OPTIONAL | REPEATED )("qualifier") qualifier.setParseAction(qualifier_fn) field = qualifier - type_("type_") + identifier("identifier") + EQ + integer("field_number") + SEMI field.setParseAction(field_fn)
comment = '#' + restOfLine musicobject.ignore(comment) #fraction = Regex(r'(\d*[./]?\d*)') number = Regex(r'[\d./]+') number.setParseAction(lambda s, l, t: [float(eval(t[0]))]) frequency_symbol = Regex(r'[abcdefg_]\d?[#-]?') frequency_number = number frequency = frequency_number ^ frequency_symbol duration = number tone = frequency ^ (Suppress('(') + frequency + Suppress(',') + duration + Suppress(')')) tone.setParseAction(lambda s, l, t: Tone(*t)) group = Suppress('{') + delimitedList(Grp(OneOrMore(musicobject)), ',') + Suppress('}') group.setParseAction(lambda s, l, t: Group(t)) transformed = tone + '*' + musicobject transformed.setParseAction(lambda s, l, t: Transformed(t[0], t[2])) musicobject << (tone ^ group ^ transformed) result = musicobject.parseFile('example.music') print(result[0]) from to_music21 import construct_music21 construct_music21(result[0]).write('musicxml', 'foo.xml') construct_music21(result[0]).show('text')