def _define_context_component (self, cluster, base_feature_set): placeholder = Literal(SOURCE_PLACEHOLDER) placeholder.setParseAction(self._handle_placeholder) context_component = Group(ZeroOrMore(cluster ^ base_feature_set) + \ placeholder + ZeroOrMore(cluster ^ base_feature_set)).setResultsName('context_component') context_component.setParseAction(self._handle_context_component) return context_component
def parser(): global _parser if _parser is None: ParserElement.setDefaultWhitespaceChars("") lbrack, rbrack, lbrace, rbrace, lparen, rparen = map(Literal, "[]{}()") reMacro = Combine("\\" + oneOf(list("dws"))) escapedChar = ~ reMacro + Combine("\\" + oneOf(list(printables))) reLiteralChar = "".join(c for c in printables if c not in r"\[]{}().*?+|") + " \t" reRange = Combine(lbrack + SkipTo(rbrack, ignore=escapedChar) + rbrack) reLiteral = (escapedChar | oneOf(list(reLiteralChar))) reDot = Literal(".") repetition = ( (lbrace + Word(nums).setResultsName("count") + rbrace) | (lbrace + Word(nums).setResultsName("minCount") + "," + Word(nums).setResultsName("maxCount") + rbrace) | oneOf(list("*+?")) ) reRange.setParseAction(handle_range) reLiteral.setParseAction(handle_literal) reMacro.setParseAction(handle_macro) reDot.setParseAction(handle_dot) reTerm = (reLiteral | reRange | reMacro | reDot) reExpr = operatorPrecedence(reTerm, [ (repetition, 1, opAssoc.LEFT, handle_repetition), (None, 2, opAssoc.LEFT, handle_sequence), (Suppress('|'), 2, opAssoc.LEFT, handle_alternative), ]) _parser = reExpr return _parser
def test_disable_pyparsing_arity_trimming_works(): """Tests that arity trimming has been disabled and parse actions with the wrong number of arguments will raise TypeErrors""" for func in [lambda a: None, lambda a, b: None, lambda a, b, c, d: None]: element = Literal('test').setParseAction(func) with raises(TypeError): element.parseString('test')
def _define_valued_characters_section(self, heading, characters, parse_action, character_type): """Returns a parser object for a section specifying characters and their valued features. :param heading: section heading :type heading: `str` :param characters: valid characters :type characters: `list` of `str` :param parse_action: parse action for a character :type parse_action: `function` :param character_type: type of characters being described :type character_type: `str` """ heading = Literal('[{}]'.format(heading)) character = Word(''.join(characters), exact=1).setResultsName( 'character') character.setParseAction(self._handle_character) feature = Word(alphas).setResultsName('feature') feature.setParseAction(parse_action) value = Literal('+') ^ Literal('-') ^ Literal('\N{MINUS SIGN}') value.setParseAction(self._handle_feature_value) feature_value = Group(value + feature) feature_values = Group(delimitedList(feature_value)) character_definition = Dict(Group(character + Suppress(':') + feature_values)) character_definitions = Group(OneOrMore(character_definition)).setResultsName(character_type) section = Suppress(heading) + character_definitions return section
def _logical_parser(expression): """ Return a new parser parsing logical expressions. This parser recognizes the following grammar, with precedence: <logical> ::= expression | '~' <logical> | <logical> '&' <logical> | <logical> '|' <logical> | <logical> '->' <logical> | <logical> '<->' <logical> .. note:: The parser uses :mod:`pytlq.ast` module's classes to build ASTs. .. credit:: Adapted from Simon Busard's parser parsing logical expressions on atomics. """ parser = Forward() not_strict = Literal('~') + expression not_strict.setParseAction(lambda tokens: Not(tokens[1])) not_ = (not_strict | expression) and_ = not_ + ZeroOrMore(Literal('&') + not_) and_.setParseAction(lambda tokens: _left(And, tokens)) or_ = and_ + ZeroOrMore(Literal('|') + and_) or_.setParseAction(lambda tokens: _left(Or, tokens)) imply = ZeroOrMore(or_ + Literal('->')) + or_ imply.setParseAction(lambda tokens: _right(Imply, tokens)) iff = imply + ZeroOrMore(Literal('<->') + imply) iff.setParseAction(lambda tokens: _left(Iff, tokens)) parser <<= iff return parser
def __init__(self, path, text, state=None): self.path = path self.base_path = os.path.dirname(path) self.text = text self.state = state opcode_name = Word(alphanums + '_') value = Regex(r'.*?(?=\s*(([a-zA-Z0-9_]+=)|//|<[a-z]|$))', re.MULTILINE) opcode = locatedExpr(opcode_name) + Literal('=').suppress() + value opcode.setParseAction(self.handle_opcode) section_name = Literal('<').suppress() + Word(alphas) + Literal('>').suppress() section = section_name section.setParseAction(self.handle_section) include = Literal('#include').suppress() + locatedExpr(QuotedString('"')) include.setParseAction(self.handle_include) statement = (section ^ opcode ^ include) self.sfz_file = ZeroOrMore(statement) + stringEnd comment = Literal('//') + restOfLine self.sfz_file.ignore(comment)
def __init__(self): from pyparsing import (ParserElement, StringEnd, LineEnd, Literal, pythonStyleComment, ZeroOrMore, Suppress, Optional, Combine, OneOrMore, Regex, oneOf, QuotedString, Group, ParseException) ParserElement.setDefaultWhitespaceChars("\t ") EOF = StringEnd() EOL = ~EOF + LineEnd() # EOL must not match on EOF escape = Literal("\\") comment = pythonStyleComment junk = ZeroOrMore(comment | EOL).suppress() ## word (i.e: single argument string) word = Suppress(escape + EOL + Optional(comment)) \ | Combine(OneOrMore( escape.suppress() + Regex(".") | QuotedString("'", escChar='\\', multiline=True) | QuotedString('"', escChar='\\', multiline=True) | Regex("[^ \t\r\n\f\v\\\\$&<>();\|\'\"`]+") | Suppress(escape + EOL) )) ## redirector (aka bash file redirectors, such as "2>&1" sequences) fd_src = Regex("[0-2]").setParseAction(lambda t: int(t[0])) fd_dst = Suppress("&") + fd_src # "[n]<word" || "[n]<&word" || "[n]<&digit-" fd_redir = (Optional(fd_src, 0) + Literal("<") |Optional(fd_src, 1) + Literal(">"))\ +(word | (fd_dst + Optional("-"))) # "&>word" || ">&word" full_redir = (oneOf("&> >&") + word)\ .setParseAction(lambda t:("&" ,">", t[-1])) # "<<<word" || "<<[-]word" here_doc = Regex("<<(<|-?)") + word # "[n]>>word" add_to_file = Optional(fd_src | Literal("&"), 1) + \ Literal(">>") + word # "[n]<>word" fd_bind = Optional(fd_src, 0) + Literal("<>") + word redirector = (fd_redir | full_redir | here_doc | add_to_file | fd_bind)\ .setParseAction(lambda token: tuple(token)) ## single command (args/redir list) command = Group(OneOrMore(redirector | word)) ## logical operators (section splits) semicolon = Suppress(";") + junk connector = (oneOf("&& || |") + junk) | semicolon ## pipeline, aka logical block of interconnected commands pipeline = junk + Group(command + ZeroOrMore(connector + command) + Optional(semicolon)) # define object attributes self.LEXER = pipeline.ignore(comment) + EOF self.parseException = ParseException
def define_number(self): """ Return the syntax definition for a number in Arabic Numerals. Override this method to support numeral systems other than Arabic Numerals (0-9). Do not override this method just to change the character used to separate thousands and decimals: Use :attr:`T_THOUSANDS_SEPARATOR` and :attr:`T_DECIMAL_SEPARATOR`, respectively. """ # Defining the basic tokens: to_dot = lambda t: "." to_plus = lambda t: "+" to_minus = lambda t: "-" positive_sign = Literal(self._grammar.get_token("positive_sign")) positive_sign.setParseAction(to_plus) negative_sign = Literal(self._grammar.get_token("negative_sign")) negative_sign.setParseAction(to_minus) decimal_sep = Literal(self._grammar.get_token("decimal_separator")) decimal_sep.setParseAction(to_dot) thousands_sep = Suppress(self._grammar.get_token("thousands_separator")) digits = Word(nums) # Building the integers and decimals: sign = positive_sign | negative_sign thousands = Word(nums, max=3) + \ OneOrMore(thousands_sep + Word(nums, exact=3)) integers = thousands | digits decimals = decimal_sep + digits number = Combine(Optional(sign) + integers + Optional(decimals)) number.setParseAction(self.make_number) number.setName("number") return number
def check_function_def_above_main(self, code): prototype = check_if_function_prototype(code) function = check_if_function(code) inside = Literal("int main") if len(inside.searchString(code)): return elif function and not prototype and self.outside_main: function_regex = re.compile("^\s*(\w+)\s+(\w+)") match = function_regex.search(code) function_name = match.group(2) if match else "NOT_FOUND" self.add_error(label="DEFINITION_ABOVE_MAIN", data={'function': function_name})
def check_main_prefix(self, code): #Return value for main is optional in C++11 parser = Literal("main")+Literal("(")+SkipTo(Literal(")"))+Literal(")")+Literal("{") if len(parser.searchString(code)): main_prefix = Literal("main")+Literal("(") full_use = "int"+Word(alphanums)+","+"char*"+Word(alphanums)+"["+"]"+")" # 3 options for main() syntax if not len((main_prefix+Literal(")")).searchString(code)) and \ not len((main_prefix+Literal("void")+Literal(")")).searchString(code)) and \ not len((main_prefix+full_use).searchString(code)): self.add_error("MAIN_SYNTAX")
def check_unnecessary_include(self, code): grammar = Literal('#') + Literal('include') + Literal('<') + Word(alphanums) try: grammar.parseString(code) begin = code.find("<") end = code.find(">") included_library = code[begin + 1:end] if included_library not in self.includes: self.add_error(label="UNNECESSARY_INCLUDE") except ParseException: return
def check_main_syntax(self, code): # Return value for main is optional in C++11 parser = Literal("int") + Literal("main") + Literal("(") + SkipTo(Literal(")")) + Literal(")") if len(parser.searchString(code)): main_prefix = Literal("int") + Literal("main") + Literal("(") full_use = Literal("int") + "argc" + "," + Optional("const") + "char" + "*" + "argv" + "[" + "]" + ")" # 3 options for main() syntax if not len((main_prefix + Literal(")")).searchString(code)) and \ not len((main_prefix + Literal("void") + Literal(")")).searchString(code)) and \ not len((main_prefix + full_use).searchString(code)): self.add_error(label="MAIN_SYNTAX")
def _getPattern(self): arith_expr = Forward() comp_expr = Forward() logic_expr = Forward() LPAR, RPAR, SEMI = map(Suppress, "();") identifier = Word(alphas+"_", alphanums+"_") multop = oneOf('* /') plusop = oneOf('+ -') expop = Literal( "^" ) compop = oneOf('> < >= <= != ==') andop = Literal("AND") orop = Literal("OR") current_value = Literal( "." ) assign = Literal( "=" ) # notop = Literal('NOT') function = oneOf(' '.join(self.FUNCTIONS)) function_call = Group(function.setResultsName('fn') + LPAR + Optional(delimitedList(arith_expr)) + RPAR) aggregate_column = QuotedString(quoteChar='{', endQuoteChar='}') single_column = QuotedString(quoteChar='[', endQuoteChar=']') integer = Regex(r"-?\d+") real = Regex(r"-?\d+\.\d*") # quotedString enables strings without quotes to pass operand = \ function_call.setParseAction(self.__evalFunction) | \ aggregate_column.setParseAction(self.__evalAggregateColumn) | \ single_column.setParseAction(self.__evalSingleColumn) | \ ((real | integer).setParseAction(self.__evalConstant)) | \ quotedString.setParseAction(self.__evalString).addParseAction(removeQuotes) | \ current_value.setParseAction(self.__evalCurrentValue) | \ identifier.setParseAction(self.__evalString) arith_expr << operatorPrecedence(operand, [ (expop, 2, opAssoc.LEFT, self.__expOp), (multop, 2, opAssoc.LEFT, self.__multOp), (plusop, 2, opAssoc.LEFT, self.__addOp), ]) # comp_expr = Group(arith_expr + compop + arith_expr) comp_expr << operatorPrecedence(arith_expr, [ (compop, 2, opAssoc.LEFT, self.__evalComparisonOp), ]) logic_expr << operatorPrecedence(comp_expr, [ (andop, 2, opAssoc.LEFT, self.__evalLogicOp), (orop, 2, opAssoc.LEFT, self.__evalLogicOp) ]) pattern = logic_expr + StringEnd() return pattern
def check_local_include(self, code): grammar = Literal('#') + Literal('include') + Literal('"') + Word(alphanums) try: grammar.parseString(code) begin = code.find('"') included_file = code[begin + 1:] end = included_file.find('"') included_file = included_file[:end] if included_file not in self.includes: self.local_includes[self.current_file].append(included_file) except ParseException: return
def get_log_formats(config): """ Parse config for log_format directives :return: iterator over ('format name', 'format string') tuple of found directives """ # log_format name [params] log_format = Literal('log_format') + parameter + Group(OneOrMore(parameter)) + semicolon log_format.ignore(pythonStyleComment) for directive in log_format.searchString(config).asList(): name = directive[1] format_string = ''.join(directive[2]) yield name, format_string
def get_parser(): """Return a section parser. @see grammar.md for the whole grammar.""" if Section._parser is None: # Parser not yet defined. Defining it. head_type = Literal("V") | Literal("C") | Literal("@") head = OPEN_HEAD \ + head_type.setResultsName("type") \ + INTEGER.setResultsName("id") \ + CLOSE_HEAD + EOL content_line = WORDS + EOL content = OneOrMore(content_line) Section._parser = Group(head + content.setResultsName("content")) return Section._parser
def __init__(self): self.variables = dict() #_base_var = Literal("$") + (("{" + Word(alphanums + "_-").setResultsName("var_name", listAllMatches=True) + Optional("[" + Word(nums + "*@") + "]") + "}") #| Word(alphanums)) _simple_var = Literal("$") + Word(alphanums + "_-").setResultsName("varname") _brace_substitute_part = Optional("/" + (Word(alphanums + "_-").setResultsName("orig")) + Optional("/" + Word(alphanums + "_-!?/\\").setResultsName("new"))) _array_access = "[" + Word(nums + "@*").setResultsName("position") + "]" _brace_var = Literal("${") + Word(alphanums + "_-").setResultsName("text") + _brace_substitute_part + Optional(_array_access) + "}" _brace_var.setParseAction(lambda x: x if not x.new else re.sub(x.orig, x.new, x.text)) _base_var = _simple_var | _brace_var self.var = ('"' + _base_var + '"') | _base_var self.var("variable")
def check_non_const_global(self, code): inside = Literal("int main") if len(inside.searchString(code)): self.outside_main = False if self.outside_main: function = check_if_function(code) variables = variables = re.compile("^(?:\w|_)+\s+(?:\w|_|\[|\])+\s*=\s*.+;") keywords = re.compile("^\s*(?:using|class|struct)") constants = re.compile("^\s*(?:static\s+)?const") if not function and variables.search(code) and \ not keywords.search(code) and \ not constants.search(code): self.add_error(label="NON_CONST_GLOBAL")
def getkw_bnf(self): sect_begin = Literal("{").suppress() sect_end = Literal("}").suppress() array_begin = Literal("[").suppress() array_end = Literal("]").suppress() tag_begin = Literal("<").suppress() tag_end = Literal(">").suppress() eql = Literal("=").suppress() dmark = Literal('$').suppress() end_data=Literal('$end').suppress() prtable = alphanums+r'!$%&*+-./<>?@^_|~' ival=Regex('[-]?\d+') dval=Regex('-?\d+\.\d*([eE]?[+-]?\d+)?') lval=Regex('([Yy]es|[Nn]o|[Tt]rue|[Ff]alse|[Oo]n|[Oo]ff)') # Helper definitions kstr= quotedString.setParseAction(removeQuotes) ^ \ dval ^ ival ^ lval ^ Word(prtable) name = Word(alphas+"_",alphanums+"_") vec=array_begin+delimitedList(dval ^ ival ^ lval ^ Word(prtable) ^ \ Literal("\n").suppress() ^ \ quotedString.setParseAction(removeQuotes))+array_end sect=name+sect_begin tag_sect=name+Group(tag_begin+name+tag_end)+sect_begin # Grammar keyword = name + eql + kstr vector = name + eql + vec data=Combine(dmark+name)+SkipTo(end_data)+end_data section=Forward() sect_def=(sect | tag_sect ) #| vec_sect) input=section | data | vector | keyword section << sect_def+ZeroOrMore(input) + sect_end # Parsing actions ival.setParseAction(self.conv_ival) dval.setParseAction(self.conv_dval) lval.setParseAction(self.conv_lval) keyword.setParseAction(self.store_key) vector.setParseAction(self.store_vector) data.setParseAction(self.store_data) sect.setParseAction(self.add_sect) tag_sect.setParseAction(self.add_sect) sect_end.setParseAction(self.pop_sect) bnf=ZeroOrMore(input) + StringEnd().setFailAction(parse_error) bnf.ignore(pythonStyleComment) return bnf
def fromString(inputText, verbose=False): if verbose: print 'Verbose:', verbose text = nestedExpr("/*", "*/").suppress().transformString(inputText) semicolon = Suppress(Word(";")) quote = Suppress(Word("\"")) op = Suppress(Word("{")) cl = Suppress(Word("}")) opp = Suppress(Word("(")) clp = Suppress(Word(")")) lt = Suppress(Word("<")) gt = Suppress(Word(">")) identifier = Word(alphas+"_",alphanums+"_") typeIdentifier = Word(alphas+"_",alphanums+"_:") ## Imports idslImport = Suppress(Word("import")) + quote + CharsNotIn("\";").setResultsName('path') + quote + semicolon idslImports = ZeroOrMore(idslImport) dictionaryDef = Word("dictionary") + lt + CharsNotIn("<>;") + gt + identifier.setResultsName('name') + semicolon sequenceDef = Word("sequence") + lt + CharsNotIn("<>;") + gt + identifier.setResultsName('name') + semicolon enumDef = Word("enum") + identifier.setResultsName('name') + op + CharsNotIn("{}") + cl + semicolon structDef = Word("struct") + identifier.setResultsName('name') + op + CharsNotIn("{}") + cl + semicolon exceptionDef = Word("exception") + identifier.setResultsName('name') + op + CharsNotIn("{}") + cl + semicolon raiseDef = Suppress(Word("throws")) + typeIdentifier + ZeroOrMore( Literal(',') + typeIdentifier ) decoratorDef = Literal('idempotent') | Literal('out') retValDef = typeIdentifier.setResultsName('ret') firstParam = Group( Optional(decoratorDef.setResultsName('decorator')) + typeIdentifier.setResultsName('type') + identifier.setResultsName('name')) nextParam = Suppress(Word(',')) + firstParam params = firstParam + ZeroOrMore(nextParam) remoteMethodDef = Group(Optional(decoratorDef) + retValDef + typeIdentifier.setResultsName('name') + opp + Optional( params).setResultsName('params') + clp + Optional(raiseDef) + semicolon ) interfaceDef = Word("interface") + typeIdentifier.setResultsName('name') + op + Group(ZeroOrMore(remoteMethodDef)) + cl + semicolon moduleContent = Group(structDef | enumDef | exceptionDef | dictionaryDef | sequenceDef | interfaceDef) module = Suppress(Word("module")) + identifier.setResultsName("name") + op + ZeroOrMore(moduleContent).setResultsName("contents") + cl + semicolon IDSL = idslImports.setResultsName("imports") + module.setResultsName("module") IDSL.ignore( cppStyleComment ) tree = IDSL.parseString(text) return IDSLParsing.module(tree)
def typeSwitch(line): global typedversion typeflag = Literal("#") + "option" + Literal("=") + oneOf("untyped", "typed") res = typeflag.parseString(line) if res[3] == "untyped": typedversion = False elif res[3] == "typed": typeversion = True else: print "Cannot determine whether typed or untyped." raise ParseException str = "Detected " if not typedversion: str += "un" str += "typed version." print str
def __init__(self, logger): self.salt_client = salt.client.LocalClient() self.etcd = Etcd(logger) self.logger = logger # Parse out the username and formation name # from the ETCD directory string self.formation_parser = Literal('/formations/') + \ Word(srange("[0-9a-zA-Z_-]")).setResultsName('username') + Literal('/') + \ Word(srange("[0-9a-zA-Z_-]")).setResultsName('formation_name')
def __init__(self): left_bracket = Literal("{").suppress() right_bracket = Literal("}").suppress() semicolon = Literal(";").suppress() space = White().suppress() key = Word(alphanums + "+.-_/") value = ZeroOrMore( CharsNotIn('{};#"\'') | space | QuotedString("'", escChar='\\', multiline=True) | QuotedString('"', escChar='\\', multiline=True)) # modifier for location uri [ = | ~ | ~* | ^~ ] modifier = Literal("=") | Literal("~*") | Literal("~") | Literal("^~") comment = Literal('#').suppress() + Optional(restOfLine) # rules assignment = Group( (key | value) + value + semicolon + Optional(space + comment)) block = Forward() block << Group( Group(key + Optional(space + modifier) + Optional(space) + Optional(value) + Optional(space + value)) + left_bracket + Group(ZeroOrMore(assignment | block | comment.suppress())) + right_bracket) def comment_handler(t): result = [] if "promo" in t[0]: result.append("promo") if "author: " in t[0]: try: email = t[0].split("author: ")[1].strip() result.append(email) except Exception: result.append(t[0]) return result comment.setParseAction(comment_handler) self.script = OneOrMore(assignment | block | comment.suppress())
def getkw_bnf(self): sect_begin = Literal("{").suppress() sect_end = Literal("}").suppress() array_begin = Literal("[").suppress() array_end = Literal("]").suppress() arg_begin = Literal("(").suppress() arg_end = Literal(")").suppress() eql = Literal("=").suppress() dmark = Literal('$').suppress() end_data=Literal('$end').suppress() prtable = alphanums+r'!$%&*+-./<>?@^_|~' # Helper definitions kstr=Word(prtable) ^ quotedString.setParseAction(removeQuotes) name = Word(alphas+"_",alphanums+"_") vec=array_begin+delimitedList(Word(prtable) ^ \ Literal("\n").suppress() ^ \ quotedString.setParseAction(removeQuotes))+array_end sect=name+sect_begin key_sect=name+Group(arg_begin+kstr+arg_end)+sect_begin vec_sect=name+Group(arg_begin+vec+ arg_end)+sect_begin # Grammar keyword = name + eql + kstr vector = name + eql + vec data=Combine(dmark+name)+SkipTo(end_data)+end_data section=Forward() sect_def=(sect | key_sect | vec_sect) input=section | data | vector | keyword section << sect_def+ZeroOrMore(input) + sect_end # Parsing actions keyword.setParseAction(self.store_key) vector.setParseAction(self.store_vector) data.setParseAction(self.store_data) sect.setParseAction(self.add_sect) key_sect.setParseAction(self.add_sect) vec_sect.setParseAction(self.add_vecsect) sect_end.setParseAction(self.pop_sect) bnf=ZeroOrMore(input) + StringEnd().setFailAction(parse_error) bnf.ignore(pythonStyleComment) return bnf
def get_access_logs(config): """ Parse config for access_log directives :return: iterator over ('path', 'format name') tuple of found directives """ access_log = Literal("access_log") + ZeroOrMore(parameter) + semicolon access_log.ignore(pythonStyleComment) for directive in access_log.searchString(config).asList(): path = directive[1] if path == 'off' or path.startswith('syslog:'): # nothing to process here continue format_name = 'combined' if len(directive) > 2 and '=' not in directive[2]: format_name = directive[2] yield path, format_name
def _get_handbrake_title_pattern(self): title = Literal("+ title").suppress() integer = Word("0123456789") time = Combine(integer + ":" + integer + ":" + integer) duration = Literal("+ duration:").suppress() subtitle = Literal("+ subtitle tracks:") iso = Literal('(iso639-2:').suppress() + Word(alphas) subtitle_track = Literal("+").suppress() + Group(integer + SkipTo(iso).suppress() + iso) + restOfLine.suppress() title_num = integer.setResultsName("title") duration_num = time.setResultsName("duration") subtitles = Group(ZeroOrMore(subtitle_track)).setResultsName("subtitles") pattern = title + title_num + \ SkipTo(duration).suppress() + \ duration + duration_num + \ SkipTo(subtitle).suppress() + subtitle.suppress() + subtitles return pattern
def grammar(): parenthesis = Forward() parenthesis <<= "(" + ZeroOrMore(CharsNotIn("()") | parenthesis) + ")" field_def = OneOrMore(Word(alphanums + "_\"'`:-") | parenthesis) field_def.setParseAction(field_act) tablename_def = ( Word(alphas + "`_") | QuotedString("\"") ) field_list_def = field_def + ZeroOrMore(Suppress(",") + field_def) field_list_def.setParseAction(field_list_act) create_table_def = Literal("CREATE") + "TABLE" + tablename_def.setResultsName("tableName") + "(" + field_list_def.setResultsName("fields") + ")" + ";" create_table_def.setParseAction(create_table_act) add_fkey_def = Literal("ALTER") + "TABLE" + "ONLY" + tablename_def.setResultsName("tableName") + "ADD" + "CONSTRAINT" + Word(alphanums + "_") + "FOREIGN" + "KEY" + "(" + Word(alphanums + "_").setResultsName("keyName") + ")" + "REFERENCES" + Word(alphanums + "_").setResultsName("fkTable") + "(" + Word(alphanums + "_").setResultsName("fkCol") + ")" + ";" add_fkey_def.setParseAction(add_fkey_act) other_statement_def = OneOrMore(CharsNotIn(";")) + ";" other_statement_def.setParseAction(other_statement_act) comment_def = "--" + ZeroOrMore(CharsNotIn("\n")) comment_def.setParseAction(other_statement_act) return OneOrMore(comment_def | create_table_def | add_fkey_def | other_statement_def)
def remove_comments(string): """Remove comments from the statements Args: string(str): String to be processed Returns: result(str): String with comments trimmed """ if string == '': return string # Remove multiline comments multiline_comment = nestedExpr('/*', '*/').suppress() string = multiline_comment.transformString(string) # Remove single line comments singleline_comment = Literal('--') + ZeroOrMore(CharsNotIn('\n')) string = singleline_comment.suppress().transformString(string) return string
def _get_bus_array_construct(self): """ Returns a construct for an array of bus data. """ bus_no = integer.setResultsName("bus_no") v_base = real.setResultsName("v_base") # kV v_magnitude = Optional(real).setResultsName("v_magnitude") v_angle = Optional(real).setResultsName("v_angle") # radians area = Optional(integer).setResultsName("area") # not used yet region = Optional(integer).setResultsName("region") # not used yet bus_data = bus_no + v_base + v_magnitude + v_angle + \ area + region + scolon bus_data.setParseAction(self.push_bus) bus_array = Literal("Bus.con") + "=" + "[" + "..." + \ ZeroOrMore(bus_data + Optional("]" + scolon)) # Sort buses according to their name (bus_no) bus_array.setParseAction(self.sort_buses) return bus_array
def __init__(self, max_=60): # define the grammar structure digits = "0123456789" star = Literal('*') number = Word(digits) | Word(alphas) steps = number range_ = number + Optional(Literal('-') + number) numspec = star | range_ expr = Group(numspec) + Optional(Literal('/') + steps) extra_groups = ZeroOrMore(Literal(',') + expr) groups = expr + extra_groups + StringEnd() # define parse actions star.setParseAction(self._expand_star) number.setParseAction(self._expand_number) range_.setParseAction(self._expand_range) expr.setParseAction(self._filter_steps) extra_groups.setParseAction(self._ignore_comma) groups.setParseAction(self._join_to_set) self.max_ = max_ self.parser = groups
#define MAX_LOCS=100 #define USERNAME = "******" #define PASSWORD = "******" a = MAX_LOCS; CORBA::initORB("xyzzy", USERNAME, PASSWORD ); """ ################# print("Example of an extractor") print("----------------------") # simple grammar to match #define's ident = Word(alphas, alphanums + "_") macroDef = Literal("#define") + ident.setResultsName( "name") + "=" + restOfLine.setResultsName("value") for t, s, e in macroDef.scanString(testData): print(t.name, ":", t.value) # or a quick way to make a dictionary of the names and values # (return only key and value tokens, and construct dict from key-value pairs) # - empty ahead of restOfLine advances past leading whitespace, does implicit lstrip during parsing macroDef = Suppress("#define") + ident + Suppress("=") + empty + restOfLine macros = dict(list(macroDef.searchString(testData))) print("macros =", macros) print() ################# print("Examples of a transformer") print("----------------------")
tmpfile2.close() ts = ts_from_file(tmpfile2.name) return ts # return params # definition of TS grammar ParserElement.setDefaultWhitespaceChars(" \t") id = Word(alphanums+"_\"'.:-") #place = Literal("p") + Word(nums) number = Word(nums).setParseAction(lambda tokens: int(tokens[0])) newlines = Suppress(OneOrMore(LineEnd())) modelName = ".model" + id("modelName") + newlines signalNames = ZeroOrMore(Suppress(oneOf(".inputs .outputs")) + OneOrMore( id ) + newlines)("signals") dummyNames = Optional(Suppress(".dummy") + OneOrMore( id ) + newlines, default=[])("dummies") arc = id + id + id + newlines graph = Literal(".state graph") + newlines + OneOrMore(Group(arc))("arcs") frequency_list = ZeroOrMore(Group(id+number)+newlines) frequency = ".frequencies" + Suppress(OneOrMore(LineEnd())) + frequency_list("frequencies") marking_list = ZeroOrMore(id) marking = ".marking" + Suppress("{") + marking_list("marking") + Suppress("}") + newlines ts_grammar = Optional(newlines) + Optional(modelName) + signalNames + dummyNames + graph + marking + Optional(frequency) + ".end" ts_grammar.ignore(pythonStyleComment) def ts_from_sis(file_or_filename): """Loads a TS (possibly extended with state frequencies) in SIS format.""" if isinstance(file_or_filename, basestring): #a filename filename = file_or_filename else: # a file object try: filename = file_or_filename.filename except AttributeError:
def check_main_syntax(self, code): # Return value for main is optional in C++11 parser = Literal("int") + Literal("main") + Literal("(") + SkipTo( Literal(")")) + Literal(")") if len(parser.searchString(code)): main_prefix = Literal("int") + Literal("main") + Literal("(") full_use = Literal("int") + "argc" + "," + Optional( "const") + "char" + "*" + "argv" + "[" + "]" + ")" # 3 options for main() syntax if not len((main_prefix + Literal(")")).searchString(code)) and \ not len((main_prefix + Literal("void") + Literal(")")).searchString(code)) and \ not len((main_prefix + full_use).searchString(code)): self.add_error(label="MAIN_SYNTAX")
from pyparsing import Suppress, Literal, Word, ZeroOrMore, OneOrMore, \ Optional, stringEnd, alphas, Forward, Empty, Group, \ quotedString from regl import conf from regl.aux import chop indentTok, dedentTok, lineEndTok, hspaceTok, superTok, parTok = \ map(Suppress, [conf.indentTok, conf.dedentTok, conf.lineEndTok, conf.hspaceTok, conf.superTok, conf.parTok]) escapedChar = (Literal("<") + Word(alphas + "-") + Literal(">"))\ .setParseAction(lambda t: conf.charMapI[''.join(t)]) def wordParseAction(t): word = u''.join(t) return escapedChar.transformString(word) return word def specWord(chrs): return Word(chrs).setParseAction(wordParseAction) def specWords(chrs): return OneOrMore(specWord(chrs))\ .setParseAction(lambda t: u' '.join(t))
# Verb Inflections and Clitics vbz_pre_inflec = ['ان', 'ن', 'ت', 'ي'] key_vbz_prefixes = ['ي', 'ت', 'ن'] vbz_suff_inflec = ['وا', 'و'] vbd_suff_inflec = ["ت", "نا", "توا", "و", "وا"] key_vbd_suffixes = ["ت", "نا", "و", "وا"] dir_obj_suffixes = ["ني", "نا", "ك", "كم", "و", "ه", "هو", "ها", "هم"] ind_obj_suffixes = ["ي", "نا", "كم", "و", "ه", "هو", "ها", "هم", "ك"] pre_neg = ['م', 'ما'] post_neg = ['ش', 'شي'] VBZ_SUFF = oneOf(vbz_suff_inflec) VBD_SUFF = oneOf(vbd_suff_inflec) VB_DO = oneOf(dir_obj_suffixes) VB_IDO = Literal("ل") + oneOf(ind_obj_suffixes) VBD_CLIT = Or([VB_DO, Optional(VB_DO) + VB_IDO]) VBZ_CLIT = Or([ VBZ_SUFF, VB_DO, VB_IDO, (VBZ_SUFF + VB_DO), (VBZ_SUFF + VB_IDO), (VBZ_SUFF + VB_DO + VB_IDO), (VB_DO + VB_IDO) ]) NEG_VBZ_CLIT = Or([oneOf(post_neg), VBZ_CLIT + oneOf(post_neg)]) NEG_VBD_CLIT = Or([oneOf(post_neg), VBD_CLIT + oneOf(post_neg)]) # Alternate way to make verb stems # from pyparsing import srange, Word # arabicChars = srange(r"[\0x0621-\0x0652,\0x067E,\0x06A4,\0x06A8]") # verbStem = Word(arabicChars, minimum=2).setName('stem') # vbz_pre = oneOf(['ان', 'ن', 'ت', 'ي']).setName('pre') # vbz_suff = oneOf(['وا', 'و']).setName('suff')
def convertToFloat(s, loc, toks): try: return float(toks[0]) except BaseException as e: raise ParseException(loc, "invalid float format %s" % toks[0]) from e exponent = CaselessLiteral("e")+Optional(sign)+Word(nums) #note that almost all these fields are optional, #and this can match almost anything. We rely on Pythons built-in #float() function to clear out invalid values - loosely matching like this #speeds up parsing quite a lot floatingPointConstant = Combine( Optional(sign) + Optional(Word(nums)) + Optional(Literal(".") + Optional(Word(nums)))+ Optional(exponent) ) floatingPointConstant.setParseAction(convertToFloat) number = floatingPointConstant #same as FP constant but don't allow a - sign nonnegativeNumber = Combine( Optional(Word(nums)) + Optional(Literal(".") + Optional(Word(nums)))+ Optional(exponent) ) nonnegativeNumber.setParseAction(convertToFloat)
# Create keywords constant expressions SELECT, FROM, WHERE, AND, OR, IN, IS, NOT, NULL, INSERT, INTO, VALUES, DELETE, CREATE, TABLE, IF, EXISTS, CHAR, VARCHAR, INT, FLOAT, DATE, DATETIME, TIME, YEAR, DATABASE = map( CaselessKeyword, "select from where and or in is not null insert into values delete create table if exists char varchar int float date datetime time year database" .split()) KEYWORDS = SELECT ^ FROM ^ WHERE ^ AND ^ OR ^ IN ^ IS ^ NOT ^ NULL ^ INSERT ^ INTO ^ VALUES ^ DELETE ^ CREATE ^ TABLE ^ IF ^ EXISTS ^ CHAR ^ VARCHAR ^ INT ^ FLOAT ^ DATE ^ DATETIME ^ TIME ^ YEAR ^ DATABASE # if not exists keyword IF_NOT_EXISTS = IF + NOT + EXISTS # not null keyword NOT_NULL = NOT + NULL # Token for an identifier ident = ~KEYWORDS + ~Literal(' ') + Word( alphas, alphanums + "_$").setName("identifier") # Token for a column name # a columName is formed by identifier separated by a dot and combined together columnName = delimitedList(ident, ".", combine=True).setName("column name") columnName.addParseAction(ppc.upcaseTokens) # Token for a sublist of column name columnNameList = Group(delimitedList(columnName)) # Token for a table name tableName = delimitedList(ident, ".", combine=True).setName("table name") tableName.addParseAction(ppc.upcaseTokens) # Token for a sublist of table name tableNameList = Group(delimitedList(tableName)) # list of token literal to identify binary operator binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True)
ParseResults, Regex, restOfLine, ZeroOrMore, ) def parse_file(filename: str) -> ParseResults: """Parse a file using the grammar.""" return bnf.parseFile(filename) # This makes sure nothing is whitespace ParserElement.setDefaultWhitespaceChars("") newline = Literal("\n") comment_line = Literal("//") + restOfLine + newline content_line = Regex("[^[].*") + newline.suppress() """ Multiple choice question """ mc_question_header = CaselessLiteral("[multiple_choice]") + newline.suppress() mc_question_section = mc_question_header + OneOrMore(content_line) mc_answer_header = CaselessLiteral("[answer]") + newline.suppress() mc_answer_section = mc_answer_header + OneOrMore(content_line) mc_feedback_header = CaselessLiteral("[feedback]") + newline.suppress()
def _pushFirst(str, loc, toks): if debug_flag: print("pushing ", toks[0], "str is ", str) exprStack.append(toks[0]) def _assignVar(str, loc, toks): global targetvar targetvar = toks[0] # ----------------------------------------------------------------------------- # The following statements define the grammar for the parser. point = Literal(".") e = CaselessLiteral("E") plusorminus = Literal("+") | Literal("-") number = Word(nums) integer = Combine(Optional(plusorminus) + number) floatnumber = Combine(integer + Optional(point + Optional(number)) + Optional(e + integer)) lbracket = Literal("[") rbracket = Literal("]") ident = Forward() ## The definition below treats array accesses as identifiers. This means your expressions ## can include references to array elements, rows and columns, e.g., a = b[i] + 5. ## Expressions within []'s are not presently supported, so a = b[i+1] will raise ## a ParseException. ident = Combine(
def get_vrml_format(): #sample = """ ##VRML V2.0 utf8 #DirectionalLight { #direction 0.577 -0.577 -0.577 #color 1.000 1.000 1.000 #intensity 0.450 #ambientIntensity 1.0 #} #DirectionalLight { #direction -0.577 -0.577 -0.577 #color 1.000 1.000 1.000 #intensity 0.680 #ambientIntensity 1.0 #} #""" # 1.0 # -1.0 # -1. #pminus1 = Word('-1') list_open = Literal('[').suppress() list_close = Literal(']').suppress() name_float = pword + pfloat name_float3 = pword + Group(pfloat * 3) name_float4 = pword + Group(pfloat * 4) #pfloat_pos = Combine(Optional('+') + pint + '.' + Optional(pint)) #pfloat_neg = Combine(Optional('-') + pint + '.' + Optional(pint)) #pfloat_pos2 = Combine(Optional('+') + Optional(pint) + '.' + pint) #pfloat_neg2 = Combine(Optional('-') + Optional(pint) + '.' + pint) #pfloat = pfloat_neg | pfloat_neg2 | pfloat_pos | pfloat_pos2 #name_name_float_list = pword + pword + float_list #pword_triple = pword + pfloat * 3 #pword_float = pword + pfloat #print(pword_float.parseString('1.')) # color 1.000 1.000 1.000 # direction -0.577 -0.577 -0.577 # out = pword_triple.parseString('color 1.000 1.000 1.000', parseAll=False) # works unused_out = name_float3.parseString('direction -0.577 -0.577 -0.577', parseAll=False) # works # ambientIntensity 1.0 unused_out = name_float.parseString('ambientIntensity 1.0', parseAll=False) #print(out, dir(type(out))) # ParsingResults #print(out.asDict()) #print(out.asList()) #---------------------------------------- #simple_shape = """ #Shape { #appearance Appearance{ #texture DEF PICBAND PixelTexture { #image 1 10 4 0xFFFFFF77 0xFF0000FF 0xFFCC0077 0xFFFF00FF #0x77FF00FF 0x00FF00FF 0x00FFFFFF 0x0000FFFF #0x7700FF77 0x444444FF #} #} #geometry Sphere{} #} #""" #hexi = Word("047xF") #simple_shape = """ #appearance Appearance{ #texture DEF PICBAND PixelTexture { #image 1 10 4 0xFFFFFF77 0xFF0000FF 0xFFCC0077 0xFFFF00FF #0x77FF00FF 0x00FF00FF 0x00FFFFFF 0x0000FFFF #0x7700FF77 0x444444FF #} #} #""" xyz_vector = list_open + OneOrMore(Group(xyz)) + list_close name_xyz_vector = pword + xyz_vector data_value = name_float3 | name_str | name_float | name_xyz_vector data_values = OneOrMore(data_value) #name_dict = pword + Group(dict_open + data_value + dict_close) name_dict = pword + Group(dict_open.suppress() + data_values + dict_close.suppress()) name_float3.parseString('skyColor 0.1 0.3 1') name_dict.parseString(""" Background { skyColor 0.1 0.3 1 } """) name_str.parseString(''' title "Texture-mapped pyramid" ''') name_dict.parseString(""" WorldInfo { title "Texture-mapped pyramid" } """) name_dict.parseString(""" WorldInfo { title "Texture-mapped pyramid" info "Gravity: on" } """) name_dict.parseString(""" DirectionalLight { direction 0.577 -0.577 -0.577 color 1.000 1.000 1.000 intensity 0.450 ambientIntensity 1.0 } """) xyz.parseString(""" 0 0 -1, """) xyz_vector.parseString(""" [ 0 0 -1, 0 0 -1, 0 0 -1, 0 0 -1, 0 0 -1, 0 0 -1, ] """) name_xyz_vector.parseString(""" vector [ 0 0 -1, 0 0 -1, 0 0 -1, 0 0 -1, 0 0 -1, 0 0 -1, ] """) #print(names_dict.parseString(""" #normal Normal { #vector [ #0 0 -1, 0 0 -1, 0 0 -1, #0 0 -1, 0 0 -1, 0 0 -1, #] #} #""")) shape.parseString(""" Shape { appearance Appearance{ texture DEF PICBAND PixelTexture { image 1 10 4 0xFFFFFF77 0xFF0000FF 0xFFCC0077 0xFFFF00FF 0x77FF00FF 0x00FF00FF 0x00FFFFFF 0x0000FFFF 0x7700FF77 0x444444FF } } geometry Sphere{} } """) #txt = read_vrml('pyramid_sphere.wrl') #---------------------------------------- txt = """ WorldInfo { title "Texture-mapped pyramid" info "Gravity: on" } Background { skyColor 0.1 0.3 1 } NavigationInfo { type "EXAMINE" headlight TRUE } Shape { appearance Appearance{ texture DEF PICBAND PixelTexture { image 1 10 4 0xFFFFFF77 0xFF0000FF 0xFFCC0077 0xFFFF00FF 0x77FF00FF 0x00FF00FF 0x00FFFFFF 0x0000FFFF 0x7700FF77 0x444444FF } } geometry Sphere{} } """ child = OneOrMore(shape) children = Literal('children') + list_open + Group(child) + list_close translation = Literal('translation') + name_float3 rotation = Literal('rotation') + name_float4 transform_values = children | translation | rotation transform1 = Literal( 'Transform') + dict_open + transform_values + dict_close transform2 = Literal('DEF') + pword_num_underscore + transform1 transform = transform1 | transform2 geometry_str = """ geometry IndexedFaceSet { coord Coordinate{ point[ -2 -2 0, 2 -2 0, 2 2 0, -2 2 0, 0 0 5, ] } coordIndex [ 0, 1, 4, -1, 1, 2, 4, -1, 2, 3, 4, -1, 3, 0, 4, -1, 3, 2, 1, 0, -1, ] texCoord TextureCoordinate { point [ 0 0, 0 .3, 0 .5, 0 .7, 0 1, ] } } """ #print('geometry...') geometry.parseString(geometry_str) shape_str = """ Shape{ appearance Appearance{ texture DEF PICBAND ImageTexture { url "http://www.rt.cs.boeing.com/people/davidk/wrl/geo/colors.jpg" repeatS FALSE repeatT FALSE } } geometry IndexedFaceSet { coord Coordinate{ point[ -2 -2 0, 2 -2 0, 2 2 0, -2 2 0, 0 0 5, ] } coordIndex [ 0, 1, 4, -1, 1, 2, 4, -1, 2, 3, 4, -1, 3, 0, 4, -1, 3, 2, 1, 0, -1, ] texCoord TextureCoordinate { point [ 0 0, 0 .3, 0 .5, 0 .7, 0 1, ] } } } """ transform_str = """ Transform{ children[ Shape{ appearance Appearance{ texture DEF PICBAND ImageTexture { url "http://www.rt.cs.boeing.com/people/davidk/wrl/geo/colors.jpg" repeatS FALSE repeatT FALSE } } geometry IndexedFaceSet { coord Coordinate{ point[ -2 -2 0, 2 -2 0, 2 2 0, -2 2 0, 0 0 5, ] } coordIndex [ 0, 1, 4, -1, 1, 2, 4, -1, 2, 3, 4, -1, 3, 0, 4, -1, 3, 2, 1, 0, -1, ] texCoord TextureCoordinate { point [ 0 0, 0 .3, 0 .5, 0 .7, 0 1, ] } } } ] } """ #print(txt) vrml_format = OneOrMore( Group(directional_light) | world_info | background | navigation_info | shape | transform) vrml_format.parseString(txt) #print('shape...') shape.parseString(shape_str) #print('transform...') transform.parseString(transform_str) if 0: print('ready for gbu') #txt = read_vrml('gbu.wrl') # t_no_float_regex = 63 sec # t_float_regex = 31 sec t0 = time.time() vrml_format.parseString(txt, parseAll=True) print(time.time() - t0) #for datai in data: #print(' ', datai) #print('done!!!!!!!!') #import json #with open('gbu.json', 'w') as fp: #json.dump(data, fp, indent=4) return vrml_format
import logging from pyparsing import alphanums, delimitedList, Group, Literal, oneOf, \ OneOrMore, Optional, Suppress, Word, ZeroOrMore logger = logging.getLogger('grafanizer.query_dsl') lp = Suppress('(') rp = Suppress(')') dot = Suppress('.') colon = Suppress(':') e_lit = Literal('entity') c_lit = Literal('check') m_lit = Literal('metric') type_lit = Literal('type') match_word = Word(alphanums + '!_-/\// ') entity_attrs = oneOf('id label') check_attrs = oneOf('id label') metric_attrs = oneOf('name') func = oneOf('startswith endswith contains full regex') m_clause = Group(metric_attrs + colon + func + lp + match_word + rp) m_exp = Suppress(m_lit) + lp + delimitedList(m_clause) + rp m_section = OneOrMore(dot + m_exp) c_clause = Group(check_attrs + colon + func + lp + match_word + rp) c_exp = Suppress(c_lit) + lp + delimitedList(c_clause) + rp
# PyParsing from pyparsing import Or, Word, Literal, nums, alphanums, alphas, restOfLine # Zato from zato.common.haproxy import http_log, Config logger = logging.getLogger(__name__) # It's something Zato can understood and treat accordingly if such a token is # found on any HAProxy configuration file's line. zato_item_token = "# ZATO " # PyParsing grammar for config values. uri = Word(alphanums + punctuation) backend_server = Literal("server").suppress() + Word(alphanums + ".-_") + \ Word(alphanums + ".-_") + Literal(":").suppress() + \ Word(nums) + restOfLine simple_option = Literal("option").suppress() + Word(alphas) frontend_bind = Literal("bind").suppress() + Or( "*" | Word(alphanums + ".-_")) + Literal(":").suppress() + Word(nums) maxconn = Literal("maxconn").suppress() + Word(nums) timeout = Literal("timeout").suppress() + Word(alphas).suppress() + Word(nums) global_log = Literal("log").suppress() + Word(alphanums + ".-_") + Literal(":").suppress() + \ Word(nums) + Word(alphanums) + Word(alphanums) option_httpchk = Literal("option httpchk").suppress() + Word(alphas) + uri monitor_uri = Literal("monitor-uri").suppress() + uri stats_uri = Literal("stats uri").suppress() + uri stats_socket = Literal("stats socket").suppress() + uri
STRING_LITERAL1 = Regex( u"'(?:[^'\\n\\r\\\\]|\\\\['ntbrf\\\\])*'(?!')", flags=re.U) STRING_LITERAL1.setParseAction( lambda x: rdflib.Literal(decodeUnicodeEscape(x[0][1:-1]))) # [157] STRING_LITERAL2 ::= '"' ( ([^#x22#x5C#xA#xD]) | ECHAR )* '"' # STRING_LITERAL2 = Literal('"') + ZeroOrMore ( # Regex(u'[^\u0022\u005C\u000A\u000D]',flags=re.U) | ECHAR ) + '"' STRING_LITERAL2 = Regex( u'"(?:[^"\\n\\r\\\\]|\\\\["ntbrf\\\\])*"(?!")', flags=re.U) STRING_LITERAL2.setParseAction( lambda x: rdflib.Literal(decodeUnicodeEscape(x[0][1:-1]))) # [161] NIL ::= '(' WS* ')' NIL = Literal('(') + ')' NIL.setParseAction(lambda x: rdflib.RDF.nil) # [162] WS ::= #x20 | #x9 | #xD | #xA # Not needed? # WS = #x20 | #x9 | #xD | #xA # [163] ANON ::= '[' WS* ']' ANON = Literal('[') + ']' ANON.setParseAction(lambda x: rdflib.BNode()) # A = CaseSensitiveKeyword('a') A = Literal('a') A.setParseAction(lambda x: rdflib.RDF.type) # ------ NON-TERMINALS --------------
def parse_imp (input): # parse a string into an element of the abstract representation # Grammar: # # <expr> ::= <integer> # true # false # <identifier> # ( if <expr> <expr> <expr> ) # ( function ( <name ... ) <expr> ) # ( <expr> <expr> ... ) # # <decl> ::= var name = expr ; # # <stmt> ::= if <expr> <stmt> else <stmt> # while <expr> <stmt> # name <- <expr> ; # print <expr> ; # <block> # # <block> ::= { <decl> ... <stmt> ... } # # <toplevel> ::= <decl> # <stmt> # idChars = alphas+"_+*-?!=<>" pIDENTIFIER = Word(idChars, idChars+"0123456789") #### NOTE THE DIFFERENCE pIDENTIFIER.setParseAction(lambda result: EPrimCall(oper_deref,[EId(result[0])])) # A name is like an identifier but it does not return an EId... pNAME = Word(idChars,idChars+"0123456789") pNAMES = ZeroOrMore(pNAME) pNAMES.setParseAction(lambda result: [result]) pINTEGER = Word("0123456789") pINTEGER.setParseAction(lambda result: EValue(VInteger(int(result[0])))) pBOOLEAN = Keyword("true") | Keyword("false") pBOOLEAN.setParseAction(lambda result: EValue(VBoolean(result[0]=="true"))) ESC_QUOTE = Literal("#\"") pSTRING = "\"" + ZeroOrMore(Combine(Word(idChars+"0123456789'") | ESC_QUOTE)) + "\"" pSTRING.setParseAction(lambda result: EValue(VString(" ".join(result[1:-1]).replace("#\"", "\"")))) pEXPR = Forward() pEXPRS = ZeroOrMore(pEXPR) pEXPRS.setParseAction(lambda result: [result]) pIF = "(" + Keyword("if") + pEXPR + pEXPR + pEXPR + ")" pIF.setParseAction(lambda result: EIf(result[2],result[3],result[4])) pARRAY = "(" + Keyword("new-array") + pEXPR + ")" pARRAY.setParseAction(lambda result: EArray(result[2])) def mkFunBody (params,body): bindings = [ (p,ERefCell(EId(p))) for p in params ] return ELet(bindings,body) pFUN = "(" + Keyword("function") + "(" + pNAMES + ")" + pEXPR + ")" pFUN.setParseAction(lambda result: EFunction(result[3],mkFunBody(result[3],result[5]))) def printRes(result): # print "GOT: ", result[2].__str__(), result[3].__str__() return EWith(EId(result[2]), result[3]) pWITH = "(" + Keyword("with") + pNAME + pEXPR + ")" pWITH.setParseAction(lambda result: printRes(result)) pCALL = "(" + pEXPR + pEXPRS + ")" pCALL.setParseAction(lambda result: ECall(result[1],result[2])) pEXPR << (pINTEGER | pBOOLEAN | pSTRING | pIDENTIFIER | pARRAY | pIF | pFUN | pWITH | pCALL) pSTMT = Forward() pSTMT_IF_1 = "if" + pEXPR + pSTMT + "else" + pSTMT + ";" pSTMT_IF_1.setParseAction(lambda result: EIf(result[1],result[2],result[4])) pSTMT_IF_2 = "if" + pEXPR + pSTMT + ";" pSTMT_IF_2.setParseAction(lambda result: EIf(result[1],result[2],EValue(VBoolean(True)))) pSTMT_WHILE = "while" + pEXPR + pSTMT + ";" pSTMT_WHILE.setParseAction(lambda result: EWhile(result[1],result[2])) pFOR_VAR = "var" + pNAME + "=" + pEXPR + ";" pFOR_VAR.setParseAction(lambda result: (result[1],result[3])) pSTMT_FOR = "for" + pFOR_VAR + pCALL + ";" + pCALL + ";" + pSTMT pSTMT_FOR.setParseAction(lambda result: createFor(result)) pSTMT_PRINT = "print" + pEXPR + ";" pSTMT_PRINT.setParseAction(lambda result: EPrimCall(oper_print,[result[1]])); pSTMT_UPDATE = pNAME + "<-" + pEXPR + ";" pSTMT_UPDATE.setParseAction(lambda result: EPrimCall(oper_update,[EId(result[0]),result[2]])) pSTMT_ARR_UPDATE = pNAME + "[" + pEXPR + "]" + "<-" + pEXPR + ";" pSTMT_ARR_UPDATE.setParseAction(lambda result: EPrimCall(oper_update_arr, [EId(result[0]), result[2], result[5]])) pSTMT_PROCEDURE = pNAME + "(" + pEXPR + ZeroOrMore("," + pEXPR) + ")" + ";" pSTMT_PROCEDURE.setParseAction(lambda result: callProcedure(result)) pSTMTS = ZeroOrMore(pSTMT) pSTMTS.setParseAction(lambda result: [result]) #{procedure hello (1 2 3) {print 1;};} pDECL_VAR = "var" + pNAME + "=" + pEXPR + ";" pDECL_VAR.setParseAction(lambda result: (result[1],result[3])) pDECL_PROCEDURE = "procedure" + pNAME + "(" + pNAME + ZeroOrMore("," + pNAME) + ")" + pSTMT + ";" pDECL_PROCEDURE.setParseAction(lambda result: createProcedure(result)) # hack to get pDECL to match only PDECL_VAR (but still leave room # to add to pDECL later) pDECL = ( pDECL_VAR | pDECL_PROCEDURE | NoMatch() ) pDECLS = ZeroOrMore(pDECL) pDECLS.setParseAction(lambda result: [result]) def mkBlock (decls,stmts): bindings = [ (n,ERefCell(expr)) for (n,expr) in decls ] return ELet(bindings,EDo(stmts)) pSTMT_BLOCK = "{" + pDECLS + pSTMTS + "}" pSTMT_BLOCK.setParseAction(lambda result: mkBlock(result[1],result[2])) pSTMT << ( pSTMT_IF_1 | pSTMT_IF_2 | pSTMT_WHILE | pSTMT_FOR | pSTMT_PRINT | pSTMT_UPDATE | pSTMT_ARR_UPDATE | pSTMT_PROCEDURE | pSTMT_BLOCK ) # can't attach a parse action to pSTMT because of recursion, so let's duplicate the parser pTOP_STMT = pSTMT.copy() pTOP_STMT.setParseAction(lambda result: {"result":"statement", "stmt":result[0]}) pTOP_DECL = pDECL.copy() pTOP_DECL.setParseAction(lambda result: {"result":"declaration", "decl":result[0]}) pABSTRACT = "#abs" + pSTMT pABSTRACT.setParseAction(lambda result: {"result":"abstract", "stmt":result[1]}) pQUIT = Keyword("#quit") pQUIT.setParseAction(lambda result: {"result":"quit"}) pTOP = (pQUIT | pABSTRACT | pTOP_DECL | pTOP_STMT ) result = pTOP.parseString(input)[0] return result # the first element of the result is the expression
'~~', endQuoteChar='~~').setParseAction(wiki_italic_parse_action) def wiki_underline_parse_action(start, length, tokens): return f'<u>{tokens[0]}</u>' underline = QuotedString( '__', endQuoteChar='__').setParseAction(wiki_underline_parse_action) def literal_parse_action(start, length, tokens): return tokens[0] literal = Literal('\\').setParseAction(nil_parse_action) + Char( printables).setParseAction(literal_parse_action) divider = Literal('{divider}').setParseAction( lambda start, length, tokens: '<hr>') wikiMarkup = literal | link | quote | bold | italic | underline | divider def wiki_render(s): return mark_safe( wikiMarkup.transformString(s)) # TODO: yea, this isn't safe... @register.filter def wiki(value):
class Tokens: """ A collection of PyParsing tokens """ # # Instructions and pseudo-op definitions # #true instruction mnemonics mnemonic = oneOf(get_all_mnemonics(lambda cls : not issubclass(cls, Instructions.DC)), caseless=True) #dc psueod-op mnemonics dc_pseudo_op = CaselessLiteral('dc.w') | CaselessLiteral('dc.b') | CaselessLiteral('dc') # # Basic assembler token definitions # hexdigits = nums + "abcdefABCDEF" #number, in one of the supported formats, (hex, binary, octal) number = Group(Optional(oneOf('$ % @')) + Word(hexdigits + '-', hexdigits)) #numbers which can be used as an argument to BCLR and similar bitNumber = oneOf('0 1 2 3 4 5 6 7') #assembler labels label = ~(mnemonic) + ~(dc_pseudo_op) + Word(alphas, alphanums + '_') + Optional(Literal(':')).suppress() #reference to a previously defined label reference = ~CaselessLiteral('SP') + ~CaselessLiteral('X') + Word(alphas, alphanums + '_') #todo: allow processing of numeric literals operand = number ^ reference # # Basic expressions, which can be used to evaluate things like ~RAMSTART or RAMSTART+1 # #the operations allowed in an ASM expression allowed_opers = \ [ (Literal('-'), 1, opAssoc.RIGHT), #sign (Literal('~'), 1, opAssoc.RIGHT), #bitwise inversion (oneOf('<< >>'), 2, opAssoc.LEFT), #bit shift operators (oneOf('* /'), 2, opAssoc.LEFT), #multiplication, division, and bitwise AND (oneOf('| ^'), 2, opAssoc.LEFT), #bitwise OR, bitwise XOR (Literal('%'), 2, opAssoc.LEFT), #modulus operator (oneOf('+ -'), 2, opAssoc.LEFT) #addition, subtraction ] #a recursive grammar which allows operations to be performed on numbers operand_expression = operatorPrecedence(operand, allowed_opers) # # Instruction Suffixes, which specify the acceptable arguments to the various instruction classes # #immediate addressing immediate_suffix = Literal('#').suppress() + operand_expression.setResultsName('immediate') #immediate and extended addressing direct_suffix = operand_expression.setResultsName('direct') #indexed (and indexed offset) addressing indexed_suffix = Optional(operand_expression).setResultsName('index_offset') + Literal(',').suppress() + (CaselessLiteral('X+').setResultsName('index') | CaselessLiteral('X').setResultsName('index')) #stack offset addressing stack_suffix = operand_expression.setResultsName('stack_offset') + Literal(',').suppress() + CaselessLiteral('SP').suppress(); #numbered bit suffix bit_suffix = bitNumber.setResultsName('bit') + Literal(',').suppress() + operand_expression.setResultsName('direct') + Literal(',').suppress() + operand_expression.setResultsName('target') #loop primitive / move suffixes branch_suffix = (indexed_suffix | immediate_suffix | stack_suffix | direct_suffix) + Literal(',').suppress() + operand_expression.setResultsName('target') # # Pseudo-Op Suffix # #constants, as allowed by the assembler: numbers, double-quoted strings, and single-quoted characters constant = operand_expression | Group(Literal('"') + Word(string.printable.replace('"', '')) + Literal('"').suppress()) | Group(Literal("'") + Word(string.printable.replace("'", ''), max=1) + Literal("'").suppress()) #suffix for define constant- a comma-delimited list of constants dc_suffix = Group(constant + ZeroOrMore(Literal(',').suppress() + constant)).setResultsName('defined') # # Core parsing definitions # #comments start at a semicolon, and emcompass the rest of the line comment = Literal(';') + restOfLine #core instruction definition non_dc_instruction = mnemonic.setResultsName('mnemonic') + Optional(bit_suffix | branch_suffix | indexed_suffix | immediate_suffix | stack_suffix | direct_suffix ) #define constant definition dc_instruction = dc_pseudo_op.setResultsName('mnemonic') + dc_suffix #allow a line to contain either a normal instruction _or_ a define constant instruction instruction = dc_instruction | non_dc_instruction #a normal line of ASM, which may include labels or comments asm_line = (Optional(label).setResultsName('label') + Optional(instruction) + Optional(comment).suppress()) | comment.suppress() #definition for a whole ASM file asm_file = asm_line | comment.suppress() @classmethod def tokenize(cls, asm): """ Breaks a singe line of ASM into its component tokens, for parsing. Returns a dictionary of token type => token value. """ #if the given line is blank, return an empty dictionary if not asm.strip(): return {} #parse the given string using pyparsing try: return cls.asm_line.parseString(asm, parseAll=True).asDict() except ParseException as e: raise InvalidSyntaxException('A syntax error exists in your code, near: ' + e.markInputline('--!--') + '.')
return args from pyparsing import ( alphas, oneOf, delimitedList, nums, Literal, Word, Combine, Optional, Suppress, ) ESC = Literal("\x1b") integer = Word(nums) escapeSeq = Combine( ESC + "[" + Optional(delimitedList(integer, ";")) + oneOf(list(alphas)) ) nonAnsiString = lambda s: Suppress(escapeSeq).transformString(s) def tof(strc): if strc == "True": return msg.colr("gf", strc) elif strc == "False": return msg.colr("rf", strc) else: return strc
def check_while_true(self, code): keyword = Literal("true") | Literal("1") statement_parser = Literal("while") + Literal("(") + keyword + Literal(")") if len(statement_parser.searchString(code)): self.add_error(label="WHILE_TRUE")
def __init__(self): ParserElement.enablePackrat() expression_l = Suppress("{{") expression_r = Suppress("}}") statement_l = Suppress("{%") statement_r = Suppress("%}") comment_l = Suppress("{#") comment_r = Suppress("#}") comment = comment_l + SkipTo(comment_r)("comment") + comment_r identifier = pyparsing_common.identifier qualified_identifier = delimitedList(identifier, ".", combine=True) chain_filter = Suppress("|") + identifier expression = ( expression_l + qualified_identifier("identifier") + chain_filter[...]("filters") + expression_r ) statement_atom = ( Keyword("None") | Keyword("False") | Keyword("True") | pyparsing_common.number | quotedString | qualified_identifier ) statement_op = ( Literal("==") | Literal(">=") | Literal("<=") | Literal("!=") | Literal("<") | Literal(">") # | Literal("not") + Literal("in") # | Literal("in") # | Literal("is") + Literal("not") # | Literal("is") ) statement_test = infixNotation( statement_atom, [ (statement_op, 2, opAssoc.LEFT), ("not", 1, opAssoc.RIGHT), ("and", 2, opAssoc.LEFT), ("or", 2, opAssoc.LEFT), ], ) if_statement = statement_l + Keyword("if") + statement_test + statement_r elif_statement = statement_l + Keyword("elif") + statement_test + statement_r else_statement = statement_l + Keyword("else") + statement_r endif_statement = statement_l + Keyword("endif") + statement_r template = ( comment | if_statement | elif_statement | else_statement | endif_statement | expression ) self.template_parser = template.parseWithTabs()
isc_boolean('check_integrity') + semicolon ) # [ Opt View Zone ] v9.4+ optview_stmt_check_mx = (Keyword('check-mx').suppress() - check_options('check_mx') + semicolon ) # [ Opt View Zone ] v9.4+ optview_stmt_check_mx_cname = (Keyword('check-mx-cname').suppress() - check_options('check_mx_cname') + semicolon ) # [ Opt View Zone ] v9.4+ # check-names (master |slave| response) (warn|fail|ignore) ; [ Opt View (Zone) ] # Zone-variant of check-names is more simplified syntax than OptView-variant # check-names response warn; optview_stmt_check_names = (Keyword('check-names').suppress() - Group((Literal('master')('') | Literal('primary')('') | Literal('slave')('') | Literal('secondary')('') | Literal('response')(''))('zone_type') + check_options('result_status'))('') + semicolon)('check_names') optview_stmt_check_sibling = (Keyword('check-sibling').suppress() + check_options('check_sibling') + semicolon ) # [ Opt View Zone ] v9.4+ optview_stmt_check_spf = (Keyword('check-spf').suppress() - check_options('check_spf') - semicolon ) # [ Opt View Zone ] v9.4+
""" CSS at-rules""" from pyparsing import Literal, Combine from .identifier import identifier atkeyword = Combine(Literal("@") + identifier)
config traffic control 49 broadcast disable multicast disable unicast disable action drop threshold 131072 countdown 0 time_interval 5 """ ports = self.next_item(tokens, "control") or "" unicast = self.next_item(tokens, "unicast") multicast = self.next_item(tokens, "multicast") broadcast = self.next_item(tokens, "broadcast") for p in self.iter_ports(ports): si = self.get_subinterface_fact(p) if broadcast == "enable": si.traffic_control_broadcast = True if multicast == "enable": si.traffic_control_multicast = True if unicast == "enable": si.traffic_control_unicast = True # Port expression parser DIGITS = Word(nums) PORT = Combine(DIGITS + Optional(Literal(":") + DIGITS)) # 1:(2,3,10-20) PORT_RANGE_PT = Group( DIGITS + Literal(":(") + delimitedList(Group(DIGITS + Suppress(Literal("-")) + DIGITS) | DIGITS, delim=",") + Suppress(Literal(")")) ) # 1:2-1:5 PORT_RANGE = Group(PORT + Suppress(Literal("-")) + PORT) # Port expression PORT_EXPR = delimitedList(PORT_RANGE_PT | PORT_RANGE | PORT, delim=",")
class RegexParser(object): esc = r"(){}[]*|+^$/.?tnrf-" hex = r"0123456789ABCDEFabcdef" # defined by pyparsing # alphanums = abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 alphanum = Word(alphanums, exact=1) alphanum.leaveWhitespace() alphanum.setParseAction(lambda orig, loc, tok: Alphanum(tok[0])) meta = Word(r" \#!\"%§&'/,:;<=>@_-~", exact=1) meta.leaveWhitespace() meta.setParseAction(lambda orig, loc, tok: Meta(tok[0])) anyCharacter = Literal('.') anyCharacter.leaveWhitespace() anyCharacter.setParseAction(lambda orig, loc, tok: AnyCharacter(tok[0])) escapedCharacter = Literal('\\\\') | Literal('\\x') + Word(hex, exact=2) | Literal('\\') + Word(esc, exact=1) escapedCharacter.leaveWhitespace() escapedCharacter.setParseAction(lambda orig, loc, tok: Esc(''.join(tok))) character = escapedCharacter | alphanum | meta character.leaveWhitespace() character.setParseAction(lambda orig, loc, tok: Character(tok[0])) characterClass = Literal('[') + pyparsing.OneOrMore(escapedCharacter | alphanum | meta) + Literal(']') characterClass.leaveWhitespace() characterClass.setParseAction(lambda orig, loc, tok: CharacterClass(tok[1:-1])) expression = Forward() atom = anyCharacter | characterClass | character | Literal('(').suppress() + expression + Literal(')').suppress() atom.leaveWhitespace() atom.setParseAction(lambda orig, loc, tok: Atom(tok[0])) eventually = atom + Literal('?') eventually.leaveWhitespace() eventually.setParseAction(lambda orig, loc, tok: Eventually(tok[0])) iteration = atom + Literal('*') iteration.leaveWhitespace() iteration.setParseAction(lambda orig, loc, tok: Iteration(tok[0])) plus = atom + Literal('+') plus.leaveWhitespace() plus.setParseAction(lambda orig, loc, tok: Plus(tok[0])) factor = iteration | plus | eventually | atom factor.leaveWhitespace() factor.setParseAction(lambda orig, loc, tok: Factor(tok[0])) term = Forward() sequence = factor + term sequence.leaveWhitespace() sequence.setParseAction(lambda orig, loc, tok: Sequence(tok[0], tok[1])) term << (sequence | factor) term.leaveWhitespace() term.setParseAction(lambda orig, loc, tok: Term(tok[0])) choice = term + Literal('|') + expression choice.leaveWhitespace() choice.setParseAction(lambda orig, loc, tok: Choice(tok[0], tok[2])) empty = pyparsing.Empty() empty.leaveWhitespace() empty.setParseAction(lambda orig, loc, tok: Empty()) expression << (choice | term | empty) expression.leaveWhitespace() expression.setParseAction(lambda orig, loc, tok: Expression(tok[0])) rootExpression = Optional(Literal('^')) + expression + Optional(Literal('$')) rootExpression.leaveWhitespace() rootExpression.setParseAction(_root_expression_helper) @classmethod def parse(cls, strng): if not isinstance(strng, str): raise Exception("Method parse() expects a string, however the argument is of type %s!" % strng.__class__.__name__) try: # temporarily limit traceback, otherwise it will flood the tests sys.tracebacklimit = 3 return cls.rootExpression.parseString(strng, parseAll=True)[0] sys.tracebacklimit = 1000 except ParseBaseException as exc: raise exc
def __init__(self): """ expop :: '^' multop :: 'x' | '/' addop :: '+' | '-' integer :: ['+' | '-'] '0'..'9'+ atom :: PI | E | real | fn '(' expr ')' | '(' expr ')' factor :: atom [ expop factor ]* term :: factor [ multop factor ]* expr :: term [ addop term ]* """ point = Literal(".") e = CaselessLiteral("E") fnumber = Combine( Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-" + nums, nums))) ident = Word(alphas, alphas + nums + "_$") plus = Literal("+") minus = Literal("-") mult = Literal("x") div = Literal("/") lpar = Literal("(").suppress() rpar = Literal(")").suppress() addop = plus | minus multop = mult | div expop = Literal("^") pi = CaselessLiteral("PI") expr = Forward() atom = ((Optional(oneOf("- +")) + (pi | e | fnumber | ident + lpar + expr + rpar).setParseAction(self.pushFirst)) | Optional(oneOf("- +")) + Group(lpar + expr + rpar)).setParseAction(self.pushUMinus) # by defining exponentiation as "atom [ ^ factor ]..." instead of # "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-right # that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor << atom + ZeroOrMore( (expop + factor).setParseAction(self.pushFirst)) term = factor + ZeroOrMore( (multop + factor).setParseAction(self.pushFirst)) expr << term + ZeroOrMore( (addop + term).setParseAction(self.pushFirst)) # addop_term = ( addop + term ).setParseAction( self.pushFirst ) # general_term = term + ZeroOrMore( addop_term ) | OneOrMore( addop_term) # expr << general_term self.bnf = expr # map operator symbols to corresponding arithmetic operations epsilon = 1e-12 self.opn = { "+": operator.add, "-": operator.sub, "x": operator.mul, "/": operator.truediv, "^": operator.pow } self.fn = { "sin": math.sin, "cos": math.cos, "tan": math.tan, "abs": abs, "trunc": lambda a: int(a), "round": round, "sgn": lambda a: abs(a) > epsilon and cmp(a, 0) or 0 }
<base-64-char> :: <alpha> | <decimal-digit> | "+" | "/" | "=" ; <null> :: "" ; """ import operator import tqdm from pyparsing import Suppress, Literal, Forward, Word, ParseFatalException, ZeroOrMore, Group, alphanums, Keyword # define punctuation literals LPAR, RPAR = map(Suppress, "()") # extended definitions token = Word(alphanums + "-./_:*+=!<>") sexp_cmd = Forward() cmd_names = (Literal('declare-fun') | Literal('model-add')) # dropped_cmd_names = Suppress(Keyword('assert') | Keyword('model-del')) # cmd_name = (cmd_names | dropped_cmd_names) cmd_name = cmd_names sexp = Forward() sexp_list = Group(LPAR + ZeroOrMore(sexp) + RPAR) sexp << (token | sexp_list) sexp_cmd_list = Group(LPAR + cmd_names + ZeroOrMore(sexp) + RPAR) # dropped_sexp_cmd_list = Suppress(Group(LPAR + dropped_cmd_names + ZeroOrMore(sexp) + RPAR)) # sexp_cmd << (token | sexp_cmd_list | dropped_sexp_cmd_list) sexp_cmd << (token | sexp_cmd_list) sexp_doc = Group(ZeroOrMore(sexp_cmd))
def parse_instruction(string, location, tokens): """Parse instruction. """ mnemonic_str = ReilMnemonic.from_string(tokens["mnemonic"]) oprnd1 = tokens["fst_operand"][0] oprnd2 = tokens["snd_operand"][0] oprnd3 = tokens["trd_operand"][0] ins_builder = ReilBuilder() return ins_builder.build(mnemonic_str, oprnd1, oprnd2, oprnd3) # ============================================================================ # comma = Literal(",") hex_num = Combine("0x" + Word("0123456789abcdef")) dec_num = Word("0123456789") immediate = Optional("-") + Or([hex_num, dec_num]) register = Word(alphanums) mnemonic = Or([ # Arithmetic Literal("add"), Literal("sub"), Literal("mul"), Literal("div"), Literal("mod"), Literal("bsh"),
# Course Engenharia Biológica # Enrolled SAUM No # regex = u"Number(.+?)Name(.+?)Email(.+?)Course(.+?)Enrolled SAUM(Yes|No)" # regex = u"Número(.+?)Nome(.+?)Email(.+?)Curso(.+?)Inscrito SAUM(Sim|Não)" match = re.findall(regex, text) with open("alunos.txt", "w") as f: for m in match: f.write(u"{} {}\n".format(m[0], m[1]).encode("utf8")) from pyparsing import Word, Literal, printables, LineStart, SkipTo, Combine name = Word(printables).setResultsName("name") seq_start = Literal("5'").suppress() seq_stop = Literal("3'").suppress() sequence = Combine(seq_start + SkipTo(seq_stop)).setResultsName("seq") mwg_primer = LineStart() + name + SkipTo(LineStart()) + sequence result = mwg_primer.scanString(raw_string) seqlist = [data for data, dataStart, dataEnd in result] number += len(seqlist) fasta_string = "" for data in seqlist: number -= 1 s = data.seq.strip("-").replace("\n", "").replace(" ", "")
def parseIDL(text): definitions = Forward() #Quotation Mark Constants lbrack = Literal('{') rbrack = Literal('}') lbrace = Literal('[') rbrace = Literal(']') lparen = Literal('(') rparen = Literal(')') comma = Literal(',') dot = Literal('.') semicolon = Literal(';') colon = Literal(':') equals = Literal('=') minus = Literal('-') asterisk = Literal('*') stringLiteral = quotedString stringLiteral.setParseAction(removeQuotes) identifier = Word(alphanums + "_") #IDL Basic Types Boolean_ = Literal("Boolean") byte_ = Literal("byte") char_ = Literal("char") double_ = Literal("double") error_status_t_ = Literal("error_status_t") float_ = Literal("float") handle_t_ = Literal("handle_t") hyper_ = Literal("hyper") int_ = Literal("int") __int8_ = Literal("__int8") __int16_ = Literal("__int16") __int32_ = Literal("__int32") __int3264_ = Literal("__int3264") __int64_ = Literal("__int64") long_ = Literal("long") short_ = Literal("short") small_ = Literal("small") void_ = Literal("void") wchar_t_ = Literal("wchar_t") basetype = (Boolean_ | byte_ | char_ | double_ | error_status_t_ | float_ | handle_t_ | hyper_ | int_ | __int8_ | __int16_ | __int32_ | __int3264_ | __int64_ | long_ | short_ | small_ | void_ | wchar_t_) #COM Types type_specifier = Forward() struct_ = Keyword("struct") union_ = Keyword("union") enum_ = Keyword("enum") hresult_ = Keyword("HRESULT") variant_ = Keyword("VARIANT") variant_bool_ = Keyword("VARIANT_BOOL") bstr_ = Keyword("BSTR") safearray_ = Literal("SAFEARRAY") + lparen + type_specifier + rparen com_type = (hresult_ | variant_ | variant_bool_ | bstr_ | safearray_) #Type specifier - this can also be a user defined type (e.g. ICWAccount) type_specifier << (basetype | com_type | struct_ | union_ | enum_ | identifier) + Suppress(Optional(ZeroOrMore(asterisk))) uuid_ = Literal("uuid") uuid_number = Regex(r"[A-Fa-f0-9]{8}-[A-Fa-f0-9]{4}-" r"[A-Fa-f0-9]{4}-[A-Fa-f0-9]{4}-[A-Fa-f0-9]{12}") uuid = Suppress(uuid_ + lparen) + uuid_number("uuid") + Suppress(rparen) integer = Combine(Optional(minus) + Word(nums)) hex_number = Regex(r"0x[A-Fa-f0-9]+") constant = hex_number | integer | stringLiteral version_ = Literal("version") version_number = (Combine(Word(nums) + dot + Word(nums)) | identifier) version = (Suppress(version_ + lparen) + version_number("version") + Suppress(rparen)) helpstring_ = Literal("helpstring") helpstring = Combine( Suppress(helpstring_ + lparen) + stringLiteral + Suppress(rparen)) plus_ = Literal('+') minus_ = Literal('-') multiply_ = Literal('*') divide_ = Literal('/') operator = (plus_ | minus_ | multiply_ | divide_) enum_modifier = operator + integer enum_definition = Group( Optional(Suppress(lbrace) + helpstring + Suppress(rbrace)) + identifier("name") + Optional( Suppress(equals) + Combine( (constant | identifier) + Optional(enum_modifier))("value")) + Optional(Suppress(comma))) typedef_ = Keyword("typedef") typedef_attribute_list = (Suppress(lbrace) + delimitedList(identifier("value"), comma) + Suppress(rbrace)) #technically this could be a list of declarators #but that function is not used in the target IDL files. declarator_list = identifier # #delimitedList(identifier, comma) typedef_header = (Suppress(typedef_) + Group(Optional(typedef_attribute_list))("attributes") + type_specifier("type") + declarator_list("name")) typedef = Group(typedef_header + Suppress(lbrack) + Group(ZeroOrMore(enum_definition("constant"))) ("constants") + Suppress(rbrack) + Suppress(declarator_list) + Suppress(semicolon)) dual_ = Keyword("dual") object_ = Keyword("object") ptr_ = Keyword("ptr") ref_ = Keyword("ref") unique_ = Keyword("unique") nonextensible_ = Keyword("nonextensible") default_ = Keyword("default") noncreatable_ = Keyword("noncreatable") hidden_ = Keyword("hidden") pointer_default_ = Literal("pointer_default") pointer_default = (Suppress(pointer_default_) + Suppress(lparen) + (ptr_ | ref_ | unique_) + Suppress(rparen)) helpcontext_ = Keyword("helpcontext") helpcontext = helpcontext_ + lparen + integer + rparen id_ = Keyword("id") com_id = (Suppress(id_) + Suppress(lparen) + identifier("id") + Suppress(rparen)) propget_ = Keyword("propget") propput_ = Keyword("propput") restricted_ = Keyword("restricted") function_attributes = (com_id | helpcontext | propget_ | propput_ | hidden_ | restricted_)("attribute") function_attribute = (helpstring("helpstring") | function_attributes) in_ = Keyword("in")("attribute") out_ = Keyword("out")("attribute") retval_ = Keyword("retval")("attribute") optional_ = Keyword("optional")("attribute") defaultvalue_ = Literal("defaultvalue") defaultvalue = (Suppress(defaultvalue_) + Suppress(lparen) + (constant | identifier) + Suppress(rparen))("defaultvalue") arg_attributes = (in_ | out_ | retval_ | optional_)("attribute") arg_attribute = (arg_attributes | defaultvalue) arg_opts = (Suppress(lbrace) + delimitedList(arg_attribute, comma) + Suppress(rbrace)) function_arg = Group( Group(Optional(arg_opts))("attributes") + #Some functions have the arg_opts twice Suppress(Optional(arg_opts)) + type_specifier("type") + Optional(ZeroOrMore(asterisk) + identifier("name")))("parameter") function_args = (Suppress(lparen) + Optional(delimitedList(function_arg, comma)) + Suppress(rparen)) function_opts = (Suppress(lbrace) + delimitedList(function_attribute, comma) + Suppress(rbrace)) function = Group( Group(Optional(function_opts))("attributes") + type_specifier("retval") + identifier("name") + Group(Optional(function_args))("parameters") + Suppress(semicolon))("function") functions = ZeroOrMore(function) source_ = Keyword("source") oleautomation_ = Keyword("oleautomation") appobject_ = Keyword("appobject") #interface definition interface_ = Keyword("interface") | Keyword("dispinterface") interface_attributes = (uuid | helpcontext | version | dual_ | object_ | pointer_default | nonextensible_ | default_ | noncreatable_ | hidden_ | source_ | oleautomation_ | appobject_)("attribute") interface_attribute = (helpstring("helpstring") | interface_attributes) interface_opts = Group( Suppress(lbrace) + ZeroOrMore(interface_attribute + Suppress(Optional(comma))) + Suppress(rbrace)) interface_body = (Suppress(lbrack) + Group(functions)("definitions") + Suppress(rbrack)) dispinterface_body = ( Suppress(lbrack) + Group(Suppress(Literal("properties:")) + functions)("properties") + Group(Suppress(Literal("methods:")) + functions)("methods") + Suppress(rbrack))("definitions") interface = Group( Optional(interface_opts)("attributes") + interface_("type") + identifier("name") + Optional(Suppress(colon) + identifier("base_class")) + Optional(interface_body | dispinterface_body) + Optional(Suppress(semicolon))) #COM coclass definition coclass_ = Keyword("coclass") coclass_attribute = (uuid | helpstring | noncreatable_ | hidden_ | appobject_) coclass_opts = ZeroOrMore(coclass_attribute + Suppress(Optional(comma))) coclass_head = Group(Suppress(lbrace) + coclass_opts + Suppress(rbrace)) coclass_body = (Suppress(lbrack) + definitions + Suppress(rbrack) + Suppress(semicolon)) coclass = (coclass_head("attributes") + coclass_("type") + identifier("name") + coclass_body) # COM Library definition library_ = Keyword("library") library_optional_attribute = (uuid | helpstring | version) library_options = Group( Suppress(lbrace) + ZeroOrMore(library_optional_attribute + Suppress(Optional(comma))) + Suppress(rbrace)) library_header = (library_options("attributes") + library_("type") + identifier("name")) library_body = (Suppress(lbrack) + Group(definitions)("definitions") + Suppress(rbrack)) library = Group(library_header + library_body + Suppress(semicolon)) definition = (library("library") | typedef("typedef") | coclass("coclass") | interface("interface")) definitions << ZeroOrMore(definition) IDL = definitions("definitions") + StringEnd() #ignore comments, preprocesser directives and imports comment = Literal('//') + restOfLine ml_comment_begin = Literal("/*") ml_comment_end = Literal("*/") ml_comment = (ml_comment_begin + SkipTo(ml_comment_end) + ml_comment_end) import_ = Literal('import') + restOfLine pp_if = Literal('#if') + restOfLine pp_endif = Literal('#endif') + restOfLine pp_else = Literal('#else') + restOfLine #Just ignore the second half of a conditional #this is a bit hacky but it should be OK for now. pp_conditional = pp_else + SkipTo(pp_endif) + (pp_endif) pp_include = Literal('#include') + restOfLine pp_define = Literal('#define') + restOfLine pp_directive = OneOrMore(pp_conditional | pp_include) midl_pragma_ = Keyword("midl_pragma") midl_pragma = midl_pragma_ + restOfLine IDL.ignore(import_) IDL.ignore(pp_define) IDL.ignore(comment) IDL.ignore(cppStyleComment) IDL.ignore(pp_conditional) IDL.ignore(pp_if) IDL.ignore(pp_endif) #IDL.ignore(pp_else) IDL.ignore(pp_include) IDL.ignore(midl_pragma) #IDL.enablePackrat() IDL("idl_file") tokens = IDL.parseString(text) #print(tokens) return tokens
class MySQLParser(object): key = Word(alphanums + "_-") space = White().suppress() value = CharsNotIn("\n") filename = Literal("!includedir") + Word(alphanums + " /.") comment = ("#") config_entry = (key + Optional(space) + Optional( Literal("=").suppress() + Optional(space) + Optional(value) + Optional(space) + Optional("#"))) single_value = key client_block = Forward() client_block << Group( (Literal("[").suppress() + key + Literal("]").suppress()) + Group(ZeroOrMore(Group(config_entry)))) include_block = Forward() include_block << Group(Combine(filename) + Group(Group(Empty()))) # The file consists of client_blocks and include_files client_file = OneOrMore(include_block | client_block).ignore(pythonStyleComment) file_header = """# File parsed and saved by privacyidea.\n\n""" def __init__(self, infile="/etc/mysql/my.cnf", content=None, opener=open): self.file = None self.opener = opener if content: self.content = content else: self.file = infile self._read() def _read(self): """ Reread the contents from the disk """ with self.opener(self.file, 'rb') as f: self.content = f.read().decode('utf-8') def get(self): """ return the grouped config """ if self.file: self._read() config = self.client_file.parseString(self.content) return config def format(self, dict_config): ''' :return: The formatted data as it would be written to a file ''' output = "" output += self.file_header for section, attributes in dict_config.items(): if section.startswith("!includedir"): output += "{0}\n".format(section) else: output += "[{0}]\n".format(section) for k, v in attributes.iteritems(): if v: output += "{k} = {v}\n".format(k=k, v=v) else: output += "{k}\n".format(k=k) output += "\n" return output def get_dict(self, section=None, key=None): ''' return the client config as a dictionary. ''' ret = {} config = self.get() for client in config: client_config = {} for attribute in client[1]: if len(attribute) > 1: client_config[attribute[0]] = attribute[1] elif len(attribute) == 1: client_config[attribute[0]] = None ret[client[0]] = client_config if section: ret = ret.get(section, {}) if key: ret = ret.get(key) return ret def save(self, dict_config=None, outfile=None): if dict_config: output = self.format(dict_config) with self.opener(outfile, 'wb') as f: for line in output.splitlines(): f.write(line.encode('utf-8') + "\n")
""" Parsing for CSS and CSS-style values, such as transform and filter attributes. """ from pyparsing import (Literal, Word, CaselessLiteral, Optional, Combine, Forward, ZeroOrMore, nums, oneOf, Group, delimitedList) #some shared definitions from pathdata from ..pathdata import number, maybeComma paren = Literal("(").suppress() cparen = Literal(")").suppress() def Parenthised(exp): return Group(paren + exp + cparen) skewY = Literal("skewY") + Parenthised(number) skewX = Literal("skewX") + Parenthised(number) rotate = Literal("rotate") + Parenthised( number + Optional(maybeComma + number + maybeComma + number)) scale = Literal("scale") + Parenthised(number + Optional(maybeComma + number)) translate = Literal("translate") + Parenthised(number + Optional(maybeComma + number))