def script(self): # constants left_bracket = Suppress("{") right_bracket = Suppress("}") semicolon = Suppress(";") space = White().suppress() keyword = Word(alphanums + ".+-_/") path = Word(alphanums + ".-_/") variable = Word("$_-" + alphanums) value_wq = Regex(r'(?:\([^\s;]*\)|\$\{\w+\}|[^\s;(){}])+') value_sq = NginxQuotedString(quoteChar="'") value_dq = NginxQuotedString(quoteChar='"') value = (value_dq | value_sq | value_wq) # modifier for location uri [ = | ~ | ~* | ^~ ] location_modifier = (Keyword("=") | Keyword("~*") | Keyword("~") | Keyword("^~")) # modifier for if statement if_modifier = Combine( Optional("!") + (Keyword("=") | Keyword("~*") | Keyword("~") | (Literal("-") + (Literal("f") | Literal("d") | Literal("e") | Literal("x"))))) condition = ((if_modifier + Optional(space) + value) | (variable + Optional(space + if_modifier + Optional(space) + value))) # rules include = (Keyword("include") + space + value + semicolon)("include") directive = (keyword + ZeroOrMore(space + value) + semicolon)("directive") file_delimiter = (Suppress("# configuration file ") + path + Suppress(":"))("file_delimiter") comment = (Suppress('#') + Regex(r".*"))("comment") hash_value = Group(value + ZeroOrMore(space + value) + semicolon)("hash_value") generic_block = Forward() if_block = Forward() location_block = Forward() hash_block = Forward() unparsed_block = Forward() sub_block = OneOrMore( Group(if_block | location_block | hash_block | generic_block | include | directive | file_delimiter | comment | unparsed_block)) if_block << ( Keyword("if") + Suppress("(") + Group(condition) + Suppress(")") + Group(left_bracket + Optional(sub_block) + right_bracket))("block") location_block << (Keyword("location") + Group( Optional(space + location_modifier) + Optional(space) + value) + Group(left_bracket + Optional(sub_block) + right_bracket))("block") hash_block << (keyword + Group(OneOrMore(space + variable)) + Group(left_bracket + Optional(OneOrMore(hash_value)) + right_bracket))("block") generic_block << ( keyword + Group(ZeroOrMore(space + variable)) + Group(left_bracket + Optional(sub_block) + right_bracket))("block") unparsed_block << ( keyword + Group(ZeroOrMore(space + variable)) + nestedExpr(opener="{", closer="}"))("unparsed_block") return sub_block
class DdlParse(DdlParseBase): """DDL parser""" _LPAR, _RPAR, _COMMA, _SEMICOLON, _DOT, _DOUBLEQUOTE, _BACKQUOTE, _SPACE = map( Suppress, "(),;.\"` ") _CREATE, _TABLE, _TEMP, _CONSTRAINT, _NOT_NULL, _PRIMARY_KEY, _UNIQUE, _UNIQUE_KEY, _FOREIGN_KEY, _REFERENCES, _KEY, _CHAR_SEMANTICS, _BYTE_SEMANTICS = \ map(CaselessKeyword, "CREATE, TABLE, TEMP, CONSTRAINT, NOT NULL, PRIMARY KEY, UNIQUE, UNIQUE KEY, FOREIGN KEY, REFERENCES, KEY, CHAR, BYTE".replace(", ", ",").split(",")) _TYPE_UNSIGNED, _TYPE_ZEROFILL = \ map(CaselessKeyword, "UNSIGNED, ZEROFILL".replace(", ", ",").split(",")) _COL_ATTR_DISTKEY, _COL_ATTR_SORTKEY, _COL_ATTR_CHARACTER_SET = \ map(CaselessKeyword, "DISTKEY, SORTKEY, CHARACTER SET".replace(", ", ",").split(",")) _FK_MATCH = \ CaselessKeyword("MATCH") + Word(alphanums + "_") _FK_ON, _FK_ON_OPT_RESTRICT, _FK_ON_OPT_CASCADE, _FK_ON_OPT_SET_NULL, _FK_ON_OPT_NO_ACTION = \ map(CaselessKeyword, "ON, RESTRICT, CASCADE, SET NULL, NO ACTION".replace(", ", ",").split(",")) _FK_ON_DELETE = \ _FK_ON + CaselessKeyword("DELETE") + (_FK_ON_OPT_RESTRICT | _FK_ON_OPT_CASCADE | _FK_ON_OPT_SET_NULL | _FK_ON_OPT_NO_ACTION) _FK_ON_UPDATE = \ _FK_ON + CaselessKeyword("UPDATE") + (_FK_ON_OPT_RESTRICT | _FK_ON_OPT_CASCADE | _FK_ON_OPT_SET_NULL | _FK_ON_OPT_NO_ACTION) _SUPPRESS_QUOTE = _BACKQUOTE | _DOUBLEQUOTE _COMMENT = Suppress("--" + Regex(r".+")) _CREATE_TABLE_STATEMENT = Suppress(_CREATE) + Optional(_TEMP)("temp") + Suppress(_TABLE) + Optional(Suppress(CaselessKeyword("IF NOT EXISTS"))) \ + Optional(_SUPPRESS_QUOTE) + Optional(Word(alphanums + "_")("schema") + Optional(_SUPPRESS_QUOTE) + _DOT + Optional(_SUPPRESS_QUOTE)) + Word(alphanums + "_<>")("table") + Optional(_SUPPRESS_QUOTE) \ + _LPAR \ + delimitedList( OneOrMore( _COMMENT | # Ignore Index Suppress(_KEY + Word(alphanums + "_'`() ")) | Group( Optional(Suppress(_CONSTRAINT) + Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_")("name") + Optional(_SUPPRESS_QUOTE)) + ( ( (_PRIMARY_KEY ^ _UNIQUE ^ _UNIQUE_KEY ^ _NOT_NULL)("type") + Optional(_SUPPRESS_QUOTE) + Optional(Word(alphanums + "_"))("name") + Optional(_SUPPRESS_QUOTE) + _LPAR + Group(delimitedList(Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_") + Optional(_SUPPRESS_QUOTE)))("constraint_columns") + _RPAR ) | ( (_FOREIGN_KEY)("type") + _LPAR + Group(delimitedList(Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_") + Optional(_SUPPRESS_QUOTE)))("constraint_columns") + _RPAR + Optional(Suppress(_REFERENCES) + Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_")("references_table") + Optional(_SUPPRESS_QUOTE) + _LPAR + Group(delimitedList(Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_") + Optional(_SUPPRESS_QUOTE)))("references_columns") + _RPAR + Optional(_FK_MATCH)("references_fk_match") # MySQL + Optional(_FK_ON_DELETE)("references_fk_on_delete") # MySQL + Optional(_FK_ON_UPDATE)("references_fk_on_update") # MySQL ) ) ) )("constraint") | Group( ((_SUPPRESS_QUOTE + Word(alphanums + " _")("name") + _SUPPRESS_QUOTE) ^ (Optional(_SUPPRESS_QUOTE) + Word(alphanums + "_")("name") + Optional(_SUPPRESS_QUOTE))) + Group( Group( Word(alphanums + "_") + Optional(CaselessKeyword("WITHOUT TIME ZONE") ^ CaselessKeyword("WITH TIME ZONE") ^ CaselessKeyword("PRECISION") ^ CaselessKeyword("VARYING")) )("type_name") + Optional(_LPAR + Regex(r"[\d\*]+\s*,*\s*\d*")("length") + Optional(_CHAR_SEMANTICS | _BYTE_SEMANTICS)("semantics") + _RPAR) + Optional(_TYPE_UNSIGNED)("unsigned") + Optional(_TYPE_ZEROFILL)("zerofill") )("type") + Optional(Word(r"\[\]"))("array_brackets") + Optional( Regex(r"(?!--)", re.IGNORECASE) + Group( Optional(Regex(r"\b(?:NOT\s+)?NULL?\b", re.IGNORECASE))("null") & Optional(Regex(r"\bAUTO_INCREMENT\b", re.IGNORECASE))("auto_increment") & Optional(Regex(r"\b(UNIQUE|PRIMARY)(?:\s+KEY)?\b", re.IGNORECASE))("key") & Optional(Regex( r"\bDEFAULT\b\s+(?:((?:[A-Za-z0-9_\.\'\" -\{\}]|[^\x01-\x7E])*\:\:(?:character varying)?[A-Za-z0-9\[\]]+)|(?:\')((?:\\\'|[^\']|,)+)(?:\')|(?:\")((?:\\\"|[^\"]|,)+)(?:\")|([^,\s]+))", re.IGNORECASE))("default") & Optional(Regex(r"\bCOMMENT\b\s+(\'(\\\'|[^\']|,)+\'|\"(\\\"|[^\"]|,)+\"|[^,\s]+)", re.IGNORECASE))("comment") & Optional(Regex(r"\bENCODE\s+[A-Za-z0-9]+\b", re.IGNORECASE))("encode") # Redshift & Optional(_COL_ATTR_DISTKEY)("distkey") # Redshift & Optional(_COL_ATTR_SORTKEY)("sortkey") # Redshift & Optional(Suppress(_COL_ATTR_CHARACTER_SET) + Word(alphanums + "_")("character_set")) # MySQL )("constraint") ) )("column") | _COMMENT ) )("columns") _DDL_PARSE_EXPR = Forward() _DDL_PARSE_EXPR << OneOrMore(_COMMENT | _CREATE_TABLE_STATEMENT) def __init__(self, ddl=None, source_database=None): super().__init__(source_database) self._ddl = ddl self._table = DdlParseTable(source_database) @property def source_database(self): """ Source database option :param source_database: enum DdlParse.DATABASE """ return super().source_database @source_database.setter def source_database(self, source_database): super(self.__class__, self.__class__).source_database.__set__(self, source_database) self._table.source_database = source_database @property def ddl(self): """DDL script""" return self._ddl @ddl.setter def ddl(self, ddl): self._ddl = ddl def parse(self, ddl=None, source_database=None): """ Parse DDL script. :param ddl: DDL script :return: DdlParseTable, Parsed table define info. """ if ddl is not None: self._ddl = ddl if source_database is not None: self.source_database = source_database if self._ddl is None: raise ValueError("DDL is not specified") ret = self._DDL_PARSE_EXPR.parseString(self._ddl) # print(ret.dump()) if "schema" in ret: self._table.schema = ret["schema"] self._table.name = ret["table"] self._table.is_temp = True if "temp" in ret else False for ret_col in ret["columns"]: if ret_col.getName() == "column": # add column col = self._table.columns.append( column_name=ret_col["name"], data_type_array=ret_col["type"], array_brackets=ret_col['array_brackets'] if "array_brackets" in ret_col else None, constraint=ret_col['constraint'] if "constraint" in ret_col else None) elif ret_col.getName() == "constraint": # set column constraint for col_name in ret_col["constraint_columns"]: col = self._table.columns[col_name] if ret_col["type"] == "PRIMARY KEY": col.not_null = True col.primary_key = True elif ret_col["type"] in ["UNIQUE", "UNIQUE KEY"]: col.unique = True elif ret_col["type"] == "NOT NULL": col.not_null = True return self._table
def value_list_action(*args): print 'v:', args, [type(i) for i in args] integer_value = Regex(r'[-]?\d+').setParseAction(NumericIntegerAction) float_value = Regex(r'[-]?\d+(\.\d*)?([eE]\d+)?').setParseAction( NumericFloatAction) value_chars = Word(alphas + alphas8bit, alphanums + alphas8bit + '%.-_*;:') string_value = ( value_chars | quotedString.setParseAction(removeQuotes)).setParseAction(StringAction) # value can contain any string once it's quoted value = string_value | integer_value | float_value value_list = (string_value ^ delimitedList(string_value) ^ OneOrMore(string_value)) binop = oneOf('= == != <> < <= > >= not like contains has ilike ' 'icontains ihas is').setName('binop') domain = Word(alphas, alphanums).setName('domain') domain_values = Group(value_list.copy()) domain_expression = (domain + Literal('=') + Literal('*') + stringEnd) \ | (domain + binop + domain_values + stringEnd) AND_ = CaselessLiteral("and") OR_ = CaselessLiteral("or") NOT_ = CaselessLiteral("not") | Literal('!') identifier = Group(delimitedList(Word(alphas, alphanums + '_'), '.')).setParseAction(IdentifierAction) ident_expression = Group(identifier + binop +
def ssw_document_setup(): crn_DWC = "".join( [x for x in ParseElementEnhance.DEFAULT_WHITE_CHARS if x != "\n"]) ParseElementEnhance.setDefaultWhitespaceChars(crn_DWC) def T(x, tag): def TPA(tag): return lambda s, l, t: [tag] + t.asList() return x.setParseAction(TPA(tag)) W = Word G = Group S = Suppress O = Optional L = Literal C = Combine identifier = W(alphas, alphanums + "_-") number = W(nums, nums) num_flt = C(number + O(L('.') + number)) num_sci = C(number + O(L('.') + number) + L('e') + O(L('-') | L('+')) + W(nums)) gorf = num_sci | num_flt wire = G('w' + S('[') + G(number + S(',') + (number | L('f'))) + S(']')) gateO = G('g' + S('[') + G(wire + S(',') + number) + S(']')) gateI = G('g' + S('[') + G(number + S(',') + wire) + S(']')) thshO = G('th' + S('[') + G(wire + S(',') + number) + S(']')) thshI = G('th' + S('[') + G(number + S(',') + wire) + S(']')) fluor = G('Fluor' + S('[') + number + S(']')) inp = "INPUT" + S(L("(")) + G(number | identifier) + S(")") + S("=") + wire out = "OUTPUT" + S("(") + G(number | identifier) + S(")") + S("=") + ( fluor | wire) inputs = G(S("{") + delimitedList(number, ",") + S("}")) outputs = G(S("{") + delimitedList((number | L('f')), ",") + S("}")) seesaw = 'seesaw' + S('[') + G(number + S(',') + inputs + S(',') + outputs) + S(']') conc = gorf + S(L('*') + L('c')) wireconc = 'conc' + S('[') + wire + S(',') + conc + S(']') outpconc = 'conc' + S('[') + (gateO | gateI) + S(',') + conc + S(']') thshconc = 'conc' + S('[') + (thshO | thshI) + S(',') + conc + S(']') # MACROS: reporter = 'reporter' + S('[') + G(number + S(',') + number) + S(']') inputfanout = 'inputfanout' + S('[') + G(number + S(',') + number + S(',') + inputs) + S(']') seesawOR = 'seesawOR' + S('[') + \ G(number + S(',') + number + S(',') + inputs + S(',') + inputs) + S(']') seesawAND = 'seesawAND' + S('[') + \ G(number + S(',') + number + S(',') + inputs + S(',') + inputs) + S(']') macros = reporter | inputfanout | seesawOR | seesawAND stmt = G(inp | out | seesaw | wireconc | outpconc | thshconc | macros) + OneOrMore(LineEnd().suppress()) document = StringStart() + ZeroOrMore( LineEnd().suppress()) + OneOrMore(stmt) + StringEnd() document.ignore(pythonStyleComment) return document
# [88] Path ::= PathAlternative Path << PathAlternative # [84] VerbPath ::= Path VerbPath = Path # [87] ObjectPath ::= GraphNodePath ObjectPath = GraphNodePath # [86] ObjectListPath ::= ObjectPath ( ',' ObjectPath )* ObjectListPath = ObjectPath + ZeroOrMore(',' + ObjectPath) GroupGraphPattern = Forward() # [102] Collection ::= '(' OneOrMore(GraphNode) ')' Collection = Suppress('(') + OneOrMore(GraphNode) + Suppress(')') Collection.setParseAction(expandCollection) # [103] CollectionPath ::= '(' OneOrMore(GraphNodePath) ')' CollectionPath = Suppress('(') + OneOrMore(GraphNodePath) + Suppress(')') CollectionPath.setParseAction(expandCollection) # [80] Object ::= GraphNode Object = GraphNode # [79] ObjectList ::= Object ( ',' Object )* ObjectList = Object + ZeroOrMore(',' + Object) # [83] PropertyListPathNotEmpty ::= ( VerbPath | VerbSimple ) ObjectListPath ( ';' ( ( VerbPath | VerbSimple ) ObjectList )? )* PropertyListPathNotEmpty = ( VerbPath | VerbSimple) + ObjectListPath + ZeroOrMore(';' + Optional(
class RawNginxParser(object): # pylint: disable=expression-not-assigned """A class that parses nginx configuration with pyparsing.""" # constants space = Optional(White()) nonspace = Regex(r"\S+") left_bracket = Literal("{").suppress() right_bracket = space.leaveWhitespace() + Literal("}").suppress() semicolon = Literal(";").suppress() key = Word(alphanums + "_/+-.") dollar_var = Combine(Literal('$') + Regex(r"[^\{\};,\s]+")) condition = Regex(r"\(.+\)") # Matches anything that is not a special character, and ${SHELL_VARS}, AND # any chars in single or double quotes # All of these COULD be upgraded to something like # https://stackoverflow.com/a/16130746 dquoted = Regex(r'(\".*\")') squoted = Regex(r"(\'.*\')") nonspecial = Regex(r"[^\{\};,]") varsub = Regex(r"(\$\{\w+\})") # nonspecial nibbles one character at a time, but the other objects take # precedence. We use ZeroOrMore to allow entries like "break ;" to be # parsed as assignments value = Combine(ZeroOrMore(dquoted | squoted | varsub | nonspecial)) location = CharsNotIn("{};," + string.whitespace) # modifier for location uri [ = | ~ | ~* | ^~ ] modifier = Literal("=") | Literal("~*") | Literal("~") | Literal("^~") # rules comment = space + Literal('#') + restOfLine() assignment = space + key + Optional(space + value, default=None) + semicolon location_statement = space + Optional(modifier) + Optional(space + location + space) if_statement = space + Literal("if") + space + condition + space charset_map_statement = space + Literal( "charset_map") + space + value + space + value map_statement = space + Literal( "map") + space + nonspace + space + dollar_var + space # This is NOT an accurate way to parse nginx map entries; it's almost # certianly too permissive and may be wrong in other ways, but it should # preserve things correctly in mmmmost or all cases. # # - I can neither prove nor disprove that it is corect wrt all escaped # semicolon situations # Addresses https://github.com/fatiherikli/nginxparser/issues/19 map_pattern = Regex(r'".*"') | Regex(r"'.*'") | nonspace map_entry = space + map_pattern + space + value + space + semicolon map_block = Group( Group(map_statement).leaveWhitespace() + left_bracket + Group(ZeroOrMore(Group(comment | map_entry)) + space).leaveWhitespace() + right_bracket) block = Forward() # key could for instance be "server" or "http", or "location" (in which case # location_statement needs to have a non-empty location) block_begin = (Group(space + key + location_statement) ^ Group(if_statement) ^ Group(charset_map_statement)).leaveWhitespace() block_innards = Group( ZeroOrMore(Group(comment | assignment) | block | map_block) + space).leaveWhitespace() block << Group(block_begin + left_bracket + block_innards + right_bracket) script = OneOrMore(Group(comment | assignment) ^ block ^ map_block) + space + stringEnd script.parseWithTabs().leaveWhitespace() def __init__(self, source): self.source = source def parse(self): """Returns the parsed tree.""" return self.script.parseString(self.source) def as_list(self): """Returns the parsed tree as a list.""" return self.parse().asList()
def bnf(css_style_obj): """ * decimal_digit :: '0' .. '9' * sign :: '-' | '+' * integer :: decimal_digit+ * float :: [ sign ] integer '.' [ integer ] [ 'e' | 'E' [ sign ] integer ] * lower_case :: 'a' .. 'z' * upper_case :: 'A' .. 'Z' * alpha :: lower_case | upper_case * punctuation :: '`' | '~' | '!' | '@' | '#' | '$' | '%' | '^' | '&' | '*' | '(' | ')' | '_' | '=' | '+' | ';' | ':' | '\'' | ',' | '<' | '.' | '>' | '/' | '?' | ' ' | '-' * string_delim :: '"' | '\'' * string :: string_delim [ alpha | decimal_digit | punctuation ]* string_delim * identifier :: '_' | alpha [ alpha | decimal_digit | '_' ]* * attr_selector :: '[' + identifier [ [ '~' | '*' | '^' | '$' | '|' ] '=' string ] ']' * class_or_id :: ( '#' | '.' ) identifier * pseudo_class :: ':' alpha [ alpha | '-' ]* [ '(' integer | identifier ')' ] * selector :: identifier [ class_or_id | attr_selector ] [ pseudo_class ] [ identifier [ pseudo_class ] ] * parameter_name :: alpha [ alpha | decimal_digit | '_' | '-' ]* * lower_hex :: 'a' .. 'f' * upper_hex :: 'A' .. 'F' * hex_digit :: decimal_digit | lower_hex | upper_hex * color :: '#' hex_digit * 6 * comment :: '/' '*' .*? '*' '/' * url :: 'url' '(' string ')' * pixel_count :: integer 'px' * percentage :: integer '%' * parameter_val :: url | color | pixel_count | percentage | parameter_name | float | integer * parameter :: parameter_name ':' [ comment* ]* parameter_val [ parameter_val | comment* ]+ ';' * parameter_block :: selector [ ',' selector ]* '{' ( parameter | comment* )+ '}' """ global BNF if BNF is None: fnumber = Regex(r"[+-]?\d+(:?\.\d*)?(:?[eE][+-]?\d+)?") identifier = Word("_"+alphas+nums) tilde = Literal("~") asterisk = Literal("*") caret = Literal("^") dsign = Literal("$") pipe = Literal("|") equal = Literal("=") squote = Literal("'") sqstring = squote + Regex(r"[^']+") + squote dquote = Literal('"') dqstring = dquote + Regex(r"[^\"]+") + dquote string = sqstring | dqstring class_or_id = Word("#"+".", "_"+alphas+nums) pclass = Combine(Word(":", "-"+alphas) + Optional( '(' + (Word(nums) | identifier) + ')')) attr_selector = Combine("[" + identifier + Optional(Optional( tilde | asterisk | caret | dsign | pipe) + equal + string) + "]") selector = Combine(Word("_"+alphas, "_"+alphas+nums) + Optional( attr_selector | class_or_id) + Optional(pclass)) | Combine( class_or_id + Optional(pclass)) | attr_selector integer = Word(nums) parameter_name = Word(alphas, alphas + nums + "_-") param_str = Word(alphas, alphas + nums + "_-") comment = Regex(r"[/][*].*?[*][/]", flags=re.S) lbrack = Literal("{") rbrack = Literal("}") px_suffix = Literal("px") pix_count = Combine(Word(nums) + px_suffix) percent = Literal("%") percentage = Combine(Word(nums) + percent) color = Word("#", hexnums, exact=7) urlstr = Keyword("url") url = urlstr + '(' + string + ')' parameter_val = url | color | pix_count | percentage | param_str | fnumber | integer parameter = (parameter_name.setParseAction( css_style_obj.push_param_name) + ':' + ZeroOrMore(comment.suppress()) + OneOrMore( parameter_val.setParseAction(css_style_obj.push_value) + ZeroOrMore( comment.suppress())) + ';').setParseAction(css_style_obj.push_parameter) parameter_block = (delimitedList(selector).setParseAction( css_style_obj.push_ident_list) + lbrack + OneOrMore( comment.suppress() | parameter) + rbrack).setParseAction( css_style_obj.push_param_block) BNF = OneOrMore(comment.suppress() | parameter_block) return BNF
class PyparsingRule(UnitRule): # item format NUMS = Word(nums) INTEGER = Combine(Optional(oneOf("+ -")) + NUMS) FLOAT = Combine(INTEGER + Optional("." + Optional(NUMS)) + Optional(oneOf("E e") + INTEGER)) STRING = QuotedString('"', '\\') | QuotedString("'", "\\") IDENT = Word(alphas + "_", alphanums + "_") # item format input INPUT_EXPRESSION = (FLOAT | STRING | IDENT) INPUT_DATETIEM = Word(nums, nums + "-: ") INPUT_ID_REFS = (Suppress("(") + OneOrMore(FLOAT + Suppress(",")) + Suppress(")")) | \ (Suppress("(") + OneOrMore(STRING + Suppress(",")) + Suppress(")")) | \ OneOrMore(FLOAT + Suppress(",")) | OneOrMore(STRING + Suppress(",")) | \ INPUT_EXPRESSION # factor format FACTOR_INT = Group( Literal("#int") + Suppress("(") + Optional(INTEGER) + Literal(",") + Optional(INTEGER) + Suppress(")")) FACTOR_FLOAT = Group( Literal("#float") + Suppress("(") + Optional(FLOAT) + Literal(",") + Optional(FLOAT) + Suppress(")")) FACTOR_IREF = Group( Literal("#iref") + Suppress("(") + IDENT + Suppress(")")) FACTOR_EREF = Group( Literal("#eref") + Suppress("(") + CharsNotIn(",") + Suppress(",") + IDENT + Suppress(")")) FACTOR_STRING = Group(Literal("#string")) FACTOR_FIX_TYPE = Group( Literal("#stime") | Literal("#date") | Literal("#ltime") | Literal("#week") | Literal("#crontab")) FACTOR_REGULAR_EXP = Group( Literal('#reg') + Suppress('(') + quotedString + Suppress(')')) # syntax items EXP, REP, OR_PART, OR, AND_PART, AND, NOT, TERM, FACTOR, STR, = ( Forward() for i in xrange(10)) # syntax tree FACTOR << (FACTOR_INT | FACTOR_FLOAT | FACTOR_IREF | FACTOR_EREF | FACTOR_STRING | FACTOR_FIX_TYPE | FACTOR_REGULAR_EXP) REP << Group( Literal("#rep") + Suppress("(") + INTEGER + Suppress(",") + INTEGER + Suppress(",") + EXP + Suppress(")")) OR_PART << Suppress(",") + EXP OR << Group( Literal("#or") + Suppress("(") + EXP + ZeroOrMore(OR_PART) + Suppress(")")) AND_PART << Suppress(",") + EXP AND << Group( Literal("#and") + Suppress("(") + EXP + ZeroOrMore(AND_PART) + Suppress(")")) NOT << Group(Literal("#not") + Suppress("(") + EXP + Suppress(")")) TERM << (REP | OR | AND | NOT | FACTOR) EXP << Group((TERM | STRING) + ZeroOrMore(EXP)) def __init__(self, reg): super(PyparsingRule, self).__init__() self.reg = reg tree = PyparsingRule.EXP.parseString(self.reg, False).asList() self._pattern = self._generate(tree) def check(self, iData, iFieldName): isSuccess = True errors = [] for item in iData: if len(item[iFieldName]) == 0: continue try: self._pattern.parseString(item[iFieldName], True) except ParseException as e: logger.error('匹配规则 %s 异常: %s' % (self.reg, str(e))) isSuccess = False errors.append(item[iFieldName]) if not isSuccess: logger.error('不符合规则 %s 的内容 %s 出现在\'%s\'列' % (self.reg, Util.value2pystr(errors), iFieldName)) return isSuccess def _generate(self, tree): pattern = Empty() i = 0 while i < len(tree): if isinstance(tree[i], list): p1 = self._generate(tree[i]) pattern += p1 elif tree[i] == "#rep": if i != 0 or len(tree) < 4: raise ParseException, "--> 策划检查表达式有错误,可疑处为#rep表达式" p1 = self._generate(tree[3]) try: p1 = p1 * (int(tree[1]), int(tree[2])) except ValueError: raise ParseException, "--> 策划检查表达式有错误,可疑处为#rep中循环数不为整数" pattern += p1 break elif tree[i] == "#or": if i != 0 or len(tree) < 2: raise ParseException, "--> 策划检查表达式有错误,可疑处为#or表达式" p1 = self._generate(tree[1]) for j in xrange(2, len(tree)): p2 = self._generate(tree[j]) p1 = p1 | p2 pattern += p1 break elif tree[i] == "#and": if i != 0 or len(tree) < 2: raise ParseException, "--> 策划检查表达式有错误,可疑处为#and表达式" parseList = [self._generate(tree[1])] for j in xrange(2, len(tree)): p2 = self._generate(tree[j]) parseList.append(p2) p1 = (Empty() + parseList[0]).setParseAction( _get_check_and(parseList)) pattern += p1 break elif tree[i] == "#not": if i != 0 or len(tree) < 2: raise ParseException, "--> 策划检查表达式有错误,可疑处为#not表达式" p1 = self._generate(tree[1]) p1 = (Empty() + PyparsingRule.INPUT_EXPRESSION).setParseAction( _get_check_not(p1)) pattern += p1 break elif tree[i] == "#int": if i != 0 or len(tree) < 2: raise ParseException, "--> 策划检查表达式有错误,可疑处为#int表达式" try: if len(tree) == 2: p1 = PyparsingRule.INTEGER elif len(tree) == 3: if tree[1] == ',': p1 = (Empty() + PyparsingRule.INTEGER).setParseAction( _get_check_num_int(None, int(tree[2]))) else: p1 = (Empty() + PyparsingRule.INTEGER).setParseAction( _get_check_num_int(int(tree[1]), None)) else: p1 = (Empty() + PyparsingRule.INTEGER).setParseAction( _get_check_num_int(int(tree[1]), int(tree[3]))) except ValueError: raise ParseException, "--> 策划检查表达式有错误,可疑处为#int中上下限不为整数" pattern += p1 break elif tree[i] == "#float": if i != 0 or len(tree) < 2: raise ParseException, "--> 策划检查表达式有错误,可疑处为#float表达式" try: if len(tree) == 2: p1 = PyparsingRule.INTEGER elif len(tree) == 3: if tree[1] == ',': p1 = (Empty() + PyparsingRule.FLOAT).setParseAction( _get_check_num_float(None, int(tree[2]))) else: p1 = (Empty() + PyparsingRule.FLOAT).setParseAction( _get_check_num_float(int(tree[1]), None)) else: p1 = (Empty() + PyparsingRule.FLOAT).setParseAction( _get_check_num_float(float(tree[1]), float(tree[3]))) except ValueError: raise ParseException, "--> 策划检查表达式有错误,可疑处为#float中上下限不为浮点数" pattern += p1 break elif tree[i] == "#iref": if i != 0 or len(tree) < 2: raise ParseException, "--> 策划检查表达式有错误,可疑处为#iref表达式" checkRule = IdRefRule(None, tree[1]) p1 = (Empty() + PyparsingRule.INPUT_ID_REFS).setParseAction( _get_check_ID(checkRule)) pattern += p1 break elif tree[i] == "#eref": if i != 0 or len(tree) < 3: raise ParseException, "--> 策划检查表达式有错误,可疑处为#eref表达式" checkRule = IdRefRule(tree[1], tree[2]) p1 = (Empty() + PyparsingRule.INPUT_ID_REFS).setParseAction( _get_check_ID(checkRule)) pattern += p1 break elif tree[i] == "#string": pattern += PyparsingRule.STRING elif tree[i] == "#stime": p1 = (Empty() + PyparsingRule.INPUT_DATETIEM).setParseAction( BASE_TYPE_CHECK[tree[i]]()) pattern += p1 elif tree[i] == "#date": p1 = (Empty() + PyparsingRule.INPUT_DATETIEM).setParseAction( BASE_TYPE_CHECK[tree[i]]()) pattern += p1 elif tree[i] == "#ltime": p1 = (Empty() + PyparsingRule.INPUT_DATETIEM).setParseAction( BASE_TYPE_CHECK[tree[i]]()) pattern += p1 elif tree[i] == "#week": p1 = (Empty() + PyparsingRule.INPUT_DATETIEM).setParseAction( BASE_TYPE_CHECK[tree[i]]()) pattern += p1 elif tree[i] == '#reg': if i != 0 or len(tree) < 2: raise ParseException, "--> 策划检查表达式有错误,可疑处为#reg表达式" pattern += Regex(tree[1][1:-1]) break elif type(tree[i]) == type(""): pattern += tree[i] i = i + 1 return pattern
def grammar(): parenthesis = Forward() parenthesis <<= "(" + ZeroOrMore(CharsNotIn("()") | parenthesis) + ")" parenthesis.setParseAction(join_string_act) quoted_string = "'" + OneOrMore(CharsNotIn("'")) + "'" quoted_string.setParseAction(join_string_act) quoted_default_value = (CaselessLiteral("DEFAULT") + quoted_string + OneOrMore(CharsNotIn(", \n\t"))) quoted_default_value.setParseAction(quoted_default_value_act) field_def = OneOrMore(quoted_default_value | Word(alphanums + "_\"'`:-/[].") | parenthesis) field_def.setParseAction(field_act) tablename_def = (Word(alphanums + "`_.") | QuotedString("\"")) field_list_def = field_def + ZeroOrMore(Suppress(",") + field_def) field_list_def.setParseAction(field_list_act) create_table_def = (CaselessLiteral("CREATE") + Optional(CaselessLiteral("UNLOGGED")) + CaselessLiteral("TABLE") + tablename_def.setResultsName("tableName") + "(" + field_list_def.setResultsName("fields") + ")" + ";") create_table_def.setParseAction(create_table_act) delete_restrict_action = ( CaselessLiteral("CASCADE") | CaselessLiteral("RESTRICT") | CaselessLiteral("NO ACTION") | (CaselessLiteral("SET") + (CaselessLiteral("NULL") | CaselessLiteral("DEFAULT")))) fkey_cols = (Word(alphanums + "._") + ZeroOrMore(Suppress(",") + Word(alphanums + "._"))) add_fkey_def = ( CaselessLiteral("ALTER") + CaselessLiteral("TABLE") + CaselessLiteral("ONLY") + tablename_def.setResultsName("tableName") + CaselessLiteral("ADD") + CaselessLiteral("CONSTRAINT") + Word(alphanums + "_") + CaselessLiteral("FOREIGN") + CaselessLiteral("KEY") + "(" + fkey_cols.setResultsName("keyName") + ")" + "REFERENCES" + Word(alphanums + "._").setResultsName("fkTable") + "(" + fkey_cols.setResultsName("fkCol") + ")" + Optional(CaselessLiteral("DEFERRABLE")) + Optional(CaselessLiteral("ON") + "UPDATE" + delete_restrict_action) + Optional(CaselessLiteral("ON") + "DELETE" + delete_restrict_action) + ";") add_fkey_def.setParseAction(add_fkey_act) other_statement_def = OneOrMore(CharsNotIn(";")) + ";" other_statement_def.setParseAction(other_statement_act) comment_def = "--" + ZeroOrMore(CharsNotIn("\n")) comment_def.setParseAction(other_statement_act) return OneOrMore(comment_def | create_table_def | add_fkey_def | other_statement_def)
def __init__(self, fragment_file, sdkconfig): try: fragment_file = open(fragment_file, 'r') except TypeError: pass path = os.path.realpath(fragment_file.name) indent_stack = [1] class parse_ctx: fragment = None # current fragment key = '' # current key keys = list() # list of keys parsed key_grammar = None # current key grammar @staticmethod def reset(): parse_ctx.fragment_instance = None parse_ctx.key = '' parse_ctx.keys = list() parse_ctx.key_grammar = None def fragment_type_parse_action(toks): parse_ctx.reset() parse_ctx.fragment = FRAGMENT_TYPES[ toks[0]]() # create instance of the fragment return None def expand_conditionals(toks, stmts): try: stmt = toks['value'] stmts.append(stmt) except KeyError: try: conditions = toks['conditional'] for condition in conditions: try: _toks = condition[1] _cond = condition[0] if sdkconfig.evaluate_expression(_cond): expand_conditionals(_toks, stmts) break except IndexError: expand_conditionals(condition[0], stmts) except KeyError: for tok in toks: expand_conditionals(tok, stmts) def key_body_parsed(pstr, loc, toks): stmts = list() expand_conditionals(toks, stmts) if parse_ctx.key_grammar.min and len( stmts) < parse_ctx.key_grammar.min: raise ParseFatalException( pstr, loc, "fragment requires at least %d values for key '%s'" % (parse_ctx.key_grammar.min, parse_ctx.key)) if parse_ctx.key_grammar.max and len( stmts) > parse_ctx.key_grammar.max: raise ParseFatalException( pstr, loc, "fragment requires at most %d values for key '%s'" % (parse_ctx.key_grammar.max, parse_ctx.key)) try: parse_ctx.fragment.set_key_value(parse_ctx.key, stmts) except Exception as e: raise ParseFatalException( pstr, loc, "unable to add key '%s'; %s" % (parse_ctx.key, str(e))) return None key = Word(alphanums + '_') + Suppress(':') key_stmt = Forward() condition_block = indentedBlock(key_stmt, indent_stack) key_stmts = OneOrMore(condition_block) key_body = Suppress(key) + key_stmts key_body.setParseAction(key_body_parsed) condition = originalTextFor( SDKConfig.get_expression_grammar()).setResultsName('condition') if_condition = Group( Suppress('if') + condition + Suppress(':') + condition_block) elif_condition = Group( Suppress('elif') + condition + Suppress(':') + condition_block) else_condition = Group( Suppress('else') + Suppress(':') + condition_block) conditional = (if_condition + Optional(OneOrMore(elif_condition)) + Optional(else_condition)).setResultsName('conditional') def key_parse_action(pstr, loc, toks): key = toks[0] if key in parse_ctx.keys: raise ParseFatalException( pstr, loc, "duplicate key '%s' value definition" % parse_ctx.key) parse_ctx.key = key parse_ctx.keys.append(key) try: parse_ctx.key_grammar = parse_ctx.fragment.get_key_grammars( )[key] key_grammar = parse_ctx.key_grammar.grammar except KeyError: raise ParseFatalException( pstr, loc, "key '%s' is not supported by fragment" % key) except Exception as e: raise ParseFatalException( pstr, loc, "unable to parse key '%s'; %s" % (key, str(e))) key_stmt << (conditional | Group(key_grammar).setResultsName('value')) return None def name_parse_action(pstr, loc, toks): parse_ctx.fragment.name = toks[0] key.setParseAction(key_parse_action) ftype = Word(alphas).setParseAction(fragment_type_parse_action) fid = Suppress(':') + Word(alphanums + '_.').setResultsName('name') fid.setParseAction(name_parse_action) header = Suppress('[') + ftype + fid + Suppress(']') def fragment_parse_action(pstr, loc, toks): key_grammars = parse_ctx.fragment.get_key_grammars() required_keys = set( [k for (k, v) in key_grammars.items() if v.required]) present_keys = required_keys.intersection(set(parse_ctx.keys)) if present_keys != required_keys: raise ParseFatalException( pstr, loc, 'required keys %s for fragment not found' % list(required_keys - present_keys)) return parse_ctx.fragment fragment_stmt = Forward() fragment_block = indentedBlock(fragment_stmt, indent_stack) fragment_if_condition = Group( Suppress('if') + condition + Suppress(':') + fragment_block) fragment_elif_condition = Group( Suppress('elif') + condition + Suppress(':') + fragment_block) fragment_else_condition = Group( Suppress('else') + Suppress(':') + fragment_block) fragment_conditional = ( fragment_if_condition + Optional(OneOrMore(fragment_elif_condition)) + Optional(fragment_else_condition)).setResultsName('conditional') fragment = (header + OneOrMore(indentedBlock(key_body, indent_stack, False))).setResultsName('value') fragment.setParseAction(fragment_parse_action) fragment.ignore('#' + restOfLine) deprecated_mapping = DeprecatedMapping.get_fragment_grammar( sdkconfig, fragment_file.name).setResultsName('value') fragment_stmt << (Group(deprecated_mapping) | Group(fragment) | Group(fragment_conditional)) def fragment_stmt_parsed(pstr, loc, toks): stmts = list() expand_conditionals(toks, stmts) return stmts parser = ZeroOrMore(fragment_stmt) parser.setParseAction(fragment_stmt_parsed) self.fragments = parser.parseFile(fragment_file, parseAll=True) for fragment in self.fragments: fragment.path = path
# PyParsing from pyparsing import alphanums, oneOf, OneOrMore, Optional, White, Word # ################################################################################################################################ # ################################################################################################################################ logger = getLogger(__name__) # ################################################################################################################################ # ################################################################################################################################ # Redis PyParsing grammar quot = Optional(oneOf(('"', "'"))) command = oneOf( ('CONFIG', 'DBSIZE', 'DECR', 'DECRBY', 'DEL', 'DUMP', 'ECHO', 'EXISTS', 'EXPIRE', 'EXPIREAT', 'FLUSHDB', 'GET', 'HDEL', 'HEXISTS', 'HGET', 'HGETALL', 'HINCRBY', 'HKEYS', 'HLEN', 'HSET', 'HSETNX', 'HVALS', 'INCR', 'INCRBY', 'INFO', 'KEYS', 'LLEN', 'LPOP', 'LPUSH', 'LPUSHX', 'LRANGE', 'LREM', 'LSET', 'LTRIM', 'MGET', 'MSET', 'MSETNX', 'OBJECT', 'PERSIST', 'PEXPIRE', 'PEXPIREAT', 'PING', 'PSETEX', 'PTTL', 'RANDOMKEY', 'RENAME', 'RENAMENX', 'RESTORE', 'RPOP', 'SADD', 'SET', 'SISMEMBER', 'SMEMBERS', 'SREM', 'TIME', 'TTL', 'TYPE', 'ZADD', 'ZRANGE', 'ZREM'), caseless=True).setResultsName('command') parameters = (OneOrMore(Word(alphanums + '-' + punctuation))).setResultsName('parameters') redis_grammar = command + Optional(White().suppress() + parameters) # ################################################################################################################################ # ################################################################################################################################
refer_component(components.ParsedFunctionFilter))) if DEBUG: Constraint.setName('Constraint') # Filter: FILTER = Suppress(CaselessKeyword('FILTER')) Filter = (FILTER + Constraint).setName('Filter') # GraphNode is recursively defined in terms of Collection, ObjectList, # PropertyListNotEmpty, and TriplesNode. GraphNode = Forward() if DEBUG: GraphNode.setName('GraphNode') # Collection: Collection = (LP + Group(OneOrMore(GraphNode)) + RP).setParseAction( refer_component(components.ParsedCollection)) if DEBUG: Collection.setName('Collection') # ObjectList: ObjectList = Group(GraphNode + ZeroOrMore(COMMA + GraphNode)) if DEBUG: ObjectList.setName('ObjectList') # PropertyListNotEmpty: PropertyListItem = (Verb + ObjectList).setParseAction( refer_component(components.PropertyValue)) if DEBUG: PropertyListItem.setName('PropertyListItem') PropertyListNotEmpty = Group(PropertyListItem +
# I've made a split of cases to avoid infinite recurrsion # functioncall ::= prefixexp args | prefixexp ‘:’ Name args functioncall_simple = ( (var | (Suppress('(') + exp + Suppress(')'))).setResultsName('fun') + args).setParseAction(function_or_method_call) def fold_chain(t, p, c): i = iter(c[1:]) return reduce(lambda fc, (m, a): function_or_method_call([fc, m, a]), zip(i, i), c[0]) functioncall_chain = (functioncall_simple + OneOrMore(args)).setParseAction(fold_chain) functioncall = functioncall_chain | functioncall_simple # prefixexp ::= var | functioncall | ‘(’ exp ‘)’ prefixexp = (functioncall | var | (Suppress('(') + exp + Suppress(')'))) # exp ::= nil | false | true | Numeral | LiteralString | ‘...’ | functiondef | # prefixexp | tableconstructor | exp binop exp | unop exp exp << (Keyword('nil').setParseAction(from_parse_result(ast.Nil)) | (Keyword('true') | Keyword('false')).setParseAction( from_parse_result(ast.Boolean)) | literal_string | prefixexp | tableconstructor) # explist ::= {exp `,´} exp explist << Group(exp + ZeroOrMore(Suppress(",") + exp))
def _make_arabic_parser(): escapechar = "//" # wordchars = printables # for specialchar in '*?^():"{}[] ' + escapechar: # wordchars = wordchars.replace(specialchar, "") # wordtext = Word(wordchars) alephba = """ abcdefghijklmnopqrstuvwxyz_ األآإـتنمكطدجحخهعغفقثصضشسيبئءؤرىةوزظذ """ wordtext = CharsNotIn('//*؟^():"{}[]$><%~#،,\' +-|') escape = Suppress( escapechar ) \ + ( Word( printables, exact = 1 ) | White( exact = 1 ) ) wordtoken = Combine(OneOrMore(wordtext | escape)) # A plain old word. plainWord = Group(wordtoken).setResultsName("Word") # A wildcard word containing * or ?. wildchars = Word("؟?*") # Start with word chars and then have wild chars mixed in wildmixed = wordtoken + OneOrMore(wildchars + Optional(wordtoken)) # Or, start with wildchars, and then either a mixture of word and wild chars # , or the next token wildstart = wildchars \ + ( OneOrMore( wordtoken + Optional( wildchars ) ) \ | FollowedBy( White() \ | StringEnd() ) ) wildcard = Group(Combine(wildmixed | wildstart)).setResultsName("Wildcard") # A range of terms startfence = Literal("[") endfence = Literal("]") rangeitem = QuotedString('"') | wordtoken to = Keyword( "الى" ) \ | Keyword( "إلى" ) \ | Keyword( "To" ) \ | Keyword( "to" ) \ | Keyword( "TO" ) openstartrange = Group( Empty() ) \ + Suppress( to + White() ) \ + Group( rangeitem ) openendrange = Group( rangeitem ) \ + Suppress( White() + to ) \ + Group( Empty() ) normalrange = Group( rangeitem ) \ + Suppress( White() + to + White() ) \ + Group( rangeitem ) range = Group( startfence \ + ( normalrange | openstartrange | openendrange ) \ + endfence ).setResultsName( "Range" ) # synonyms syn_symbol = Literal("~") synonym = Group(syn_symbol + wordtoken).setResultsName("Synonyms") # antonyms ant_symbol = Literal("#") antonym = Group(ant_symbol + wordtoken).setResultsName("Antonyms") # derivation level 1,2 derive_symbole = Literal("<") | Literal(">") derivation = Group(OneOrMore(derive_symbole) + wordtoken).setResultsName("Derivation") # spellerrors # spellerrors=Group(QuotedString('\'')).setResultsName("Errors") spellerrors_symbole = Literal("%") spellerrors = Group(spellerrors_symbole + wordtoken).setResultsName("SpellErrors") # shakl:must uplevel to boostable tashkil_symbol = Literal("'") tashkil = Group( tashkil_symbol + \ ZeroOrMore( wordtoken | White() ) + \ tashkil_symbol ).setResultsName( "Tashkil" ) # tuple search (root,pattern,type) starttuple = Literal("{") endtuple = Literal("}") bettuple = Literal("،") | Literal(",") wordtuple = Group(Optional(wordtoken)) tuple = Group( starttuple + \ wordtuple + \ ZeroOrMore( bettuple + wordtuple ) + \ endtuple ).setResultsName( "Tuple" ) # A word-like thing generalWord = range | wildcard | plainWord | tuple | antonym | synonym | \ derivation | tashkil | spellerrors # A quoted phrase quotedPhrase = Group(QuotedString('"')).setResultsName("Quotes") expression = Forward() # Parentheses can enclose (group) any expression parenthetical = Group( (Suppress("(") + expression + Suppress(")"))).setResultsName("Group") boostableUnit = generalWord | quotedPhrase boostedUnit = Group( boostableUnit + \ Suppress( "^" ) + \ Word( "0123456789", ".0123456789" ) ).setResultsName( "Boost" ) # The user can flag that a parenthetical group, quoted phrase, or word # should be searched in a particular field by prepending 'fn:', where fn is # the name of the field. fieldableUnit = parenthetical | boostedUnit | boostableUnit fieldedUnit = Group( ( Word( alephba + "_" ) | Word( alphanums + "_" ) ) + \ Suppress( ':' ) + \ fieldableUnit ).setResultsName( "Field" ) # Units of content unit = fieldedUnit | fieldableUnit # A unit may be "not"-ed. operatorNot = Group( Suppress( Keyword( "ليس" ) | Keyword( "NOT" ) ) + \ Suppress( White() ) + \ unit ).setResultsName( "Not" ) generalUnit = operatorNot | unit andToken = Keyword("و") | Keyword("AND") orToken = Keyword("أو") | Keyword("او") | Keyword("OR") andNotToken = Keyword("وليس") | Keyword("ANDNOT") operatorAnd = Group( ( generalUnit + \ Suppress( White() ) + \ Suppress( andToken ) + \ Suppress( White() ) + \ expression ) | \ ( generalUnit + \ Suppress( Literal( "+" ) ) + \ expression ) ).setResultsName( "And" ) operatorOr = Group( ( generalUnit + \ Suppress( White() ) + \ Suppress( orToken ) + \ Suppress( White() ) + \ expression ) | \ ( generalUnit + \ Suppress( Literal( "|" ) ) + \ expression ) ).setResultsName( "Or" ) operatorAndNot = Group( ( unit + \ Suppress( White() ) + \ Suppress( andNotToken ) + \ Suppress( White() ) + \ expression ) | \ ( unit + \ Suppress( Literal( "-" ) ) + \ expression ) ).setResultsName( "AndNot" ) expression << ( OneOrMore( operatorAnd | operatorOr | operatorAndNot | \ generalUnit | Suppress( White() ) ) | Empty() ) toplevel = Group(expression).setResultsName("Toplevel") + StringEnd() return toplevel.parseString
def Sequence(token): """ A sequence of the token""" return OneOrMore(token + maybeComma)
from undebt.pattern.common import COMMA_IND from undebt.pattern.common import DOT from undebt.pattern.common import NAME from undebt.pattern.common import NL from undebt.pattern.common import NO_BS_NL from undebt.pattern.common import NUM from undebt.pattern.common import PARENS from undebt.pattern.common import SKIP_TO_TEXT from undebt.pattern.common import START_OF_FILE from undebt.pattern.common import STRING from undebt.pattern.util import addspace from undebt.pattern.util import condense ASSIGN_OP = Combine((Word("~%^&*-+|/") | ~Literal("==")) + Literal("=")) UNARY_OP = addspace(OneOrMore(Word("~-+") | Keyword("not"))) BINARY_OP = ~ASSIGN_OP + ( Word("!%^&*-+=|/<>") | Keyword("and") | Keyword("or") | addspace(OneOrMore(Keyword("is") | Keyword("not") | Keyword("in")))) OP = ASSIGN_OP | UNARY_OP | BINARY_OP TRAILER = DOT + NAME | PARENS | BRACKETS TRAILERS = condense(ZeroOrMore(TRAILER)) ATOM_BASE = NAME | NUM | PARENS | BRACKETS | BRACES | STRING
Value = MatchFirst([ DateTime, Date, Time, quotedString.setParseAction(removeQuotes), Word(nums).setParseAction(lambda toks: int(toks[0])), ]).setParseAction(Token) class _List(Token): def __call__(self, ctx): return [item(ctx) for item in self.value] List = Group(Value + OneOrMore(Suppress(Literal(",")) + Value)).setParseAction(_List) in_op = lambda lhs, rhs: operator.contains(rhs, lhs) if lhs and rhs else False re_op = lambda lhs, rhs: bool(rhs.search(lhs)) if lhs and rhs else False # Operators ComparisonOp = MatchFirst([ Literal("==").setParseAction(lambda toks: operator.eq), Literal("!=").setParseAction(lambda toks: operator.ne), Literal("<=").setParseAction(lambda toks: operator.le), Literal("<").setParseAction(lambda toks: operator.lt), Literal(">=").setParseAction(lambda toks: operator.ge), Literal(">").setParseAction(lambda toks: operator.gt), Keyword("in").setParseAction(lambda toks: in_op), Literal("=~").setParseAction(lambda toks: re_op), ])
# -*- coding: utf-8 -*- from pyparsing import Word, OneOrMore inFilename = 'out1.txt' FIN = open(inFilename, 'r') TEXT = FIN.read() myDigits = '0123456789' eng_alphas = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' rus_alphas = 'йцукенгшщзхъфывапролджэячсмитьбюЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮ' punctuation = '.,:;' myPrintables = myDigits + eng_alphas + rus_alphas + punctuation aWord = Word(myPrintables) someText = OneOrMore(aWord) outputText = someText.parseString(TEXT) finalOutput = ' '.join(outputText) print finalOutput
def graph_definition(): global graphparser if not graphparser: # punctuation colon = Literal(":") lbrace = Literal("{") rbrace = Literal("}") lbrack = Literal("[") rbrack = Literal("]") lparen = Literal("(") rparen = Literal(")") equals = Literal("=") comma = Literal(",") dot = Literal(".") slash = Literal("/") bslash = Literal("\\") star = Literal("*") semi = Literal(";") at = Literal("@") minus = Literal("-") # keywords strict_ = CaselessLiteral("strict") graph_ = CaselessLiteral("graph") digraph_ = CaselessLiteral("digraph") subgraph_ = CaselessLiteral("subgraph") node_ = CaselessLiteral("node") edge_ = CaselessLiteral("edge") # token definitions identifier = Word(alphanums + "_.").setName("identifier") double_quoted_string = QuotedString('"', multiline=True, unquoteResults=False, escChar="\\") noncomma = "".join([c for c in printables if c != ","]) alphastring_ = OneOrMore(CharsNotIn(noncomma + " ")) def parse_html(s, loc, toks): return "<%s>" % "".join(toks[0]) opener = "<" closer = ">" html_text = (nestedExpr( opener, closer, (CharsNotIn(opener + closer))).setParseAction(parse_html).leaveWhitespace()) ID = (identifier | html_text | double_quoted_string | alphastring_).setName("ID") float_number = Combine(Optional(minus) + OneOrMore(Word(nums + "."))).setName("float_number") righthand_id = (float_number | ID).setName("righthand_id") port_angle = (at + ID).setName("port_angle") port_location = (OneOrMore(Group(colon + ID)) | Group(colon + lparen + ID + comma + ID + rparen)).setName("port_location") port = (Group(port_location + Optional(port_angle)) | Group(port_angle + Optional(port_location))).setName("port") node_id = ID + Optional(port) a_list = OneOrMore(ID + Optional(equals + righthand_id) + Optional(comma.suppress())).setName("a_list") attr_list = OneOrMore(lbrack.suppress() + Optional(a_list) + rbrack.suppress()).setName("attr_list") attr_stmt = (Group(graph_ | node_ | edge_) + attr_list).setName("attr_stmt") edgeop = (Literal("--") | Literal("->")).setName("edgeop") stmt_list = Forward() graph_stmt = Group(lbrace.suppress() + Optional(stmt_list) + rbrace.suppress() + Optional(semi.suppress())).setName("graph_stmt") edge_point = Forward() edgeRHS = OneOrMore(edgeop + edge_point) edge_stmt = edge_point + edgeRHS + Optional(attr_list) subgraph = Group(subgraph_ + Optional(ID) + graph_stmt).setName("subgraph") edge_point << Group(subgraph | graph_stmt | node_id).setName("edge_point") node_stmt = (node_id + Optional(attr_list) + Optional(semi.suppress())).setName("node_stmt") assignment = (ID + equals + righthand_id).setName("assignment") stmt = (assignment | edge_stmt | attr_stmt | subgraph | graph_stmt | node_stmt).setName("stmt") stmt_list << OneOrMore(stmt + Optional(semi.suppress())) graphparser = OneOrMore((Optional(strict_) + Group( (graph_ | digraph_)) + Optional(ID) + graph_stmt).setResultsName("graph")) singleLineComment = Group("//" + restOfLine) | Group("#" + restOfLine) # actions graphparser.ignore(singleLineComment) graphparser.ignore(cStyleComment) assignment.setParseAction(push_attr_list) a_list.setParseAction(push_attr_list) edge_stmt.setParseAction(push_edge_stmt) node_stmt.setParseAction(push_node_stmt) attr_stmt.setParseAction(push_default_stmt) subgraph.setParseAction(push_subgraph_stmt) graph_stmt.setParseAction(push_graph_stmt) graphparser.setParseAction(push_top_graph_stmt) return graphparser
elements = ['Ac', 'Ag', 'Al', 'Am', 'Ar', 'As', 'At', 'Au', 'B', 'Ba', 'Be', 'Bh', 'Bi', 'Bk', 'Br', 'C', 'Ca', 'Cd', 'Ce', 'Cf', 'Cl', 'Cm', 'Cn', 'Co', 'Cr', 'Cs', 'Cu', 'Db', 'Ds', 'Dy', 'Er', 'Es', 'Eu', 'F', 'Fe', 'Fl', 'Fm', 'Fr', 'Ga', 'Gd', 'Ge', 'H', 'He', 'Hf', 'Hg', 'Ho', 'Hs', 'I', 'In', 'Ir', 'K', 'Kr', 'La', 'Li', 'Lr', 'Lu', 'Lv', 'Md', 'Mg', 'Mn', 'Mo', 'Mt', 'N', 'Na', 'Nb', 'Nd', 'Ne', 'Ni', 'No', 'Np', 'O', 'Os', 'P', 'Pa', 'Pb', 'Pd', 'Pm', 'Po', 'Pr', 'Pt', 'Pu', 'Ra', 'Rb', 'Re', 'Rf', 'Rg', 'Rh', 'Rn', 'Ru', 'S', 'Sb', 'Sc', 'Se', 'Sg', 'Si', 'Sm', 'Sn', 'Sr', 'Ta', 'Tb', 'Tc', 'Te', 'Th', 'Ti', 'Tl', 'Tm', 'U', 'Uuo', 'Uup', 'Uus', 'Uut', 'V', 'W', 'Xe', 'Y', 'Yb', 'Zn', 'Zr'] digits = list(map(str, list(range(10)))) symbols = list("[](){}^+-/") phases = ["(s)", "(l)", "(g)", "(aq)"] tokens = reduce(lambda a, b: a ^ b, list(map(Literal, elements + digits + symbols + phases))) tokenizer = OneOrMore(tokens) + StringEnd() # HTML, Text are temporarily copied from openedx.core.djangolib.markup # These libraries need to be moved out of edx-platform to be used by # other applications. # See LEARNER-5853 for more details. Text = markupsafe.escape # pylint: disable=invalid-name def HTML(html): # pylint: disable=invalid-name return markupsafe.Markup(html) def _orjoin(l): return "'" + "' | '".join(l) + "'"
class NginxConfigParser(object): """ Nginx config parser based on https://github.com/fatiherikli/nginxparser Parses single file into json structure """ max_size = 20*1024*1024 # 20 mb # line starts/ends line_start = LineStart().suppress() line_end = LineEnd().suppress() # constants left_brace = Literal("{").suppress() left_parentheses = Literal("(").suppress() right_brace = Literal("}").suppress() right_parentheses = Literal(")").suppress() semicolon = Literal(";").suppress() space = White().suppress() singleQuote = Literal("'").suppress() doubleQuote = Literal('"').suppress() # keys if_key = Keyword("if").setParseAction(set_line_number) set_key = Keyword("set").setParseAction(set_line_number) rewrite_key = Keyword("rewrite").setParseAction(set_line_number) perl_set_key = Keyword("perl_set").setParseAction(set_line_number) log_format_key = Keyword("log_format").setParseAction(set_line_number) alias_key = Keyword("alias").setParseAction(set_line_number) return_key = Keyword("return").setParseAction(set_line_number) error_page_key = Keyword("error_page").setParseAction(set_line_number) map_key = Keyword("map").setParseAction(set_line_number) server_name_key = Keyword("server_name").setParseAction(set_line_number) sub_filter_key = Keyword("sub_filter").setParseAction(set_line_number) # lua keys start_with_lua_key = Regex(r'lua_\S+').setParseAction(set_line_number) contains_by_lua_key = Regex(r'\S+_by_lua\S*').setParseAction(set_line_number) key = ( ~map_key & ~alias_key & ~perl_set_key & ~if_key & ~set_key & ~rewrite_key & ~server_name_key & ~sub_filter_key ) + Word(alphanums + '$_:%?"~<>\/-+.,*()[]"' + "'").setParseAction(set_line_number) # values value_one = Regex(r'[^{};]*"[^\";]+"[^{};]*') value_two = Regex(r'[^{};]*\'[^\';]+\'') value_three = Regex(r'[^{};]+((\${[\d|\w]+(?=})})|[^{};])+') value_four = Regex(r'[^{};]+(?!${.+})') value = (value_one | value_two | value_three | value_four).setParseAction(set_line_number) quotedValue = Regex(r'"[^;]+"|\'[^;]+\'').setParseAction(set_line_number) rewrite_value = CharsNotIn(";").setParseAction(set_line_number) any_value = CharsNotIn(";").setParseAction(set_line_number) non_space_value = Regex(r'[^\'\";\s]+').setParseAction(set_line_number) if_value = Regex(r'\(.*\)').setParseAction(set_line_number) language_include_value = CharsNotIn("'").setParseAction(set_line_number) strict_value = CharsNotIn("{};").setParseAction(set_line_number) sub_filter_value = (non_space_value | Regex(r"\'(.|\n)+?\'", )).setParseAction(set_line_number) # map values map_value_one = Regex(r'\'([^\']|\s)*\'').setParseAction(set_line_number) map_value_two = Regex(r'"([^"]|\s)*\"').setParseAction(set_line_number) map_value_three = Regex(r'((\\\s|[^{};\s])*)').setParseAction(set_line_number) map_value = (map_value_one | map_value_two | map_value_three) # modifier for location uri [ = | ~ | ~* | ^~ ] modifier = Literal("=") | Literal("~*") | Literal("~") | Literal("^~") # rules assignment = ( key + Optional(space) + Optional(value) + Optional(space) + Optional(value) + Optional(space) + semicolon ).setParseAction(set_line_number) set = ( set_key + Optional(space) + any_value + Optional(space) + semicolon ).setParseAction(set_line_number) rewrite = ( rewrite_key + Optional(space) + rewrite_value + Optional(space) + semicolon ).setParseAction(set_line_number) perl_set = ( perl_set_key + Optional(space) + key + Optional(space) + singleQuote + language_include_value + singleQuote + Optional(space) + semicolon ).setParseAction(set_line_number) lua_content = ( (start_with_lua_key | contains_by_lua_key) + Optional(space) + singleQuote + language_include_value + singleQuote + Optional(space) + semicolon ).setParseAction(set_line_number) alias = ( alias_key + space + any_value + Optional(space) + semicolon ).setParseAction(set_line_number) return_ = ( (return_key | error_page_key) + space + value + Optional(space) + Optional(any_value) + Optional(space) + semicolon ).setParseAction(set_line_number) log_format = ( log_format_key + Optional(space) + strict_value + Optional(space) + any_value + Optional(space) + semicolon ).setParseAction(set_line_number) server_name = ( server_name_key + space + any_value + Optional(space) + semicolon ).setParseAction(set_line_number) sub_filter = ( sub_filter_key + space + sub_filter_value + space + sub_filter_value + Optional(space) + semicolon ).setParseAction(set_line_number) # script map_block = Forward() map_block << Group( Group( map_key + space + map_value + space + map_value + Optional(space) ).setParseAction(set_line_number) + left_brace + Group( ZeroOrMore( Group(map_value + Optional(space) + Optional(map_value) + Optional(space) + semicolon) ).setParseAction(set_line_number) ) + right_brace ) block = Forward() block << Group( ( Group( key + Optional(space + modifier) + Optional(space) + Optional(value) + Optional(space) + Optional(value) + Optional(space) ) | Group(if_key + space + if_value + Optional(space)) ).setParseAction(set_line_number) + left_brace + Group( ZeroOrMore( Group(log_format) | Group(lua_content) | Group(perl_set) | Group(set) | Group(rewrite) | Group(alias) | Group(return_) | Group(assignment) | Group(server_name) | Group(sub_filter) | map_block | block ).setParseAction(set_line_number) ).setParseAction(set_line_number) + right_brace ) script = OneOrMore( Group(log_format) | Group(perl_set) | Group(lua_content) | Group(alias) | Group(return_) | Group(assignment) | Group(set) | Group(rewrite) | Group(sub_filter) | map_block | block ).ignore(pythonStyleComment) INCLUDE_RE = re.compile(r'[^#]*include\s+(?P<include_file>.*);') SSL_CERTIFICATE_RE = re.compile(r'[^#]*ssl_certificate\s+(?P<cert_file>.*);') def __init__(self, filename='/etc/nginx/nginx.conf'): global tokens_cache tokens_cache = {} self.filename = filename self.folder = '/'.join(self.filename.split('/')[:-1]) # stores path to folder with main config self.files = {} # to prevent cycle files and line indexing self.parsed_cache = {} # to cache multiple includes self.broken_files = set() # to prevent reloading broken files self.index = [] # stores index for all sections (points to file number and line number) self.ssl_certificates = [] self.errors = [] self.tree = {} def parse(self): self.tree = self.__logic_parse(self.__pyparse(self.filename)) # drop cached self.parsed_cache = None @staticmethod def get_file_info(filename): """ Returns file size, mtime and permissions :param filename: str filename :return: int, int, str - size, mtime, permissions """ size, mtime, permissions = 0, 0, '0000' try: size = os.path.getsize(filename) mtime = int(os.path.getmtime(filename)) permissions = oct(os.stat(filename).st_mode & 0777) except Exception, e: exception_name = e.__class__.__name__ message = 'failed to stat %s due to: %s' % (filename, exception_name) context.log.debug(message, exc_info=True) return size, mtime, permissions
class SQLParser(): """General SQL Parser Based on and inspired by ddlparser Modified to: - support SQLite edge cases: - AUTO_INCREMENT vs AUTOINCREMENT - support other statement types: (WIP) - CREATE INDEX - CREATE SCHEMA - CREATE SEQUENCE - CREATE DATABASE - replaces parse results with ZQL - adds parse_expression to support parsing expressions into ZQL (WIP) """ FUNCTION_REGEX = re.compile(r'^([a-zA-Z][0-9a-zA-Z._]*)\(([^)]*)\)$') INITIALLY_DEFERRED, INITIALLY_IMMEDIATE, DEFERRABLE, NOT_DEFERRABLE = map( CaselessKeyword, "INITIALLY DEFERRED, INITIALLY IMMEDIATE, DEFERRABLE, NOT DEFERRABLE". replace(", ", ",").split(",")) LPAR, RPAR, COMMA, SEMICOLON, DOT, DOUBLEQUOTE, BACKQUOTE, SPACE = map( Suppress, "(),;.\"` ") CREATE, TABLE, TEMP, CONSTRAINT, NOT_NULL, PRIMARY_KEY, UNIQUE, UNIQUE_KEY, FOREIGN_KEY, REFERENCES, KEY, CHAR_SEMANTICS, BYTE_SEMANTICS = \ map(CaselessKeyword, "CREATE, TABLE, TEMP, CONSTRAINT, NOT NULL, PRIMARY KEY, UNIQUE, UNIQUE KEY, FOREIGN KEY, REFERENCES, KEY, CHAR, BYTE".replace(", ", ",").split(",")) TYPE_UNSIGNED, TYPE_ZEROFILL = \ map(CaselessKeyword, "UNSIGNED, ZEROFILL".replace(", ", ",").split(",")) COL_ATTR_DISTKEY, COL_ATTR_SORTKEY, COL_ATTR_CHARACTER_SET = \ map(CaselessKeyword, "DISTKEY, SORTKEY, CHARACTER SET".replace(", ", ",").split(",")) FK_MATCH = \ CaselessKeyword("MATCH") + Word(alphanums + "_") FK_ON, FK_ON_OPT_RESTRICT, FK_ON_OPT_CASCADE, FK_ON_OPT_SET_NULL, FK_ON_OPT_NO_ACTION = \ map(CaselessKeyword, "ON, RESTRICT, CASCADE, SET NULL, NO ACTION".replace(", ", ",").split(",")) FK_ON_DELETE = \ FK_ON + CaselessKeyword("DELETE") + (FK_ON_OPT_RESTRICT | FK_ON_OPT_CASCADE | FK_ON_OPT_SET_NULL | FK_ON_OPT_NO_ACTION) FK_ON_UPDATE = \ FK_ON + CaselessKeyword("UPDATE") + (FK_ON_OPT_RESTRICT | FK_ON_OPT_CASCADE | FK_ON_OPT_SET_NULL | FK_ON_OPT_NO_ACTION) SUPPRESS_QUOTE = BACKQUOTE | DOUBLEQUOTE COMMENT = Suppress("--" + Regex(r".+")) CREATE_TABLE_STATEMENT = Suppress(CREATE) + Optional(TEMP)("temporary") + Suppress(TABLE) + Optional(Optional(CaselessKeyword("IF NOT EXISTS")("maybe"))) \ + Optional(SUPPRESS_QUOTE) + Optional(Word(alphanums + "_")("schema") + Optional(SUPPRESS_QUOTE) + DOT + Optional(SUPPRESS_QUOTE)) + Word(alphanums + "_<>")("table") + Optional(SUPPRESS_QUOTE) \ + LPAR \ + delimitedList( OneOrMore( COMMENT | # Ignore Index Suppress(KEY + Word(alphanums + "_'`() ")) | Group( Optional(Suppress(CONSTRAINT) + Optional(SUPPRESS_QUOTE) + Word(alphanums + "_")("name") + Optional(SUPPRESS_QUOTE)) + ( ( (PRIMARY_KEY ^ UNIQUE ^ UNIQUE_KEY ^ NOT_NULL)("type") + Optional(SUPPRESS_QUOTE) + Optional(Word(alphanums + "_"))("name") + Optional(SUPPRESS_QUOTE) + LPAR + Group(delimitedList(Optional(SUPPRESS_QUOTE) + Word(alphanums + "_") + Optional(SUPPRESS_QUOTE)))("constraint_columns") + RPAR ) | ( (FOREIGN_KEY)("type") + LPAR + Group(delimitedList(Optional(SUPPRESS_QUOTE) + Word(alphanums + "_") + Optional(SUPPRESS_QUOTE)))("constraint_columns") + RPAR + Optional(Suppress(REFERENCES) + Optional(SUPPRESS_QUOTE) + Word(alphanums + "_")("references_table") + Optional(SUPPRESS_QUOTE) + LPAR + Group(delimitedList(Optional(SUPPRESS_QUOTE) + Word(alphanums + "_") + Optional(SUPPRESS_QUOTE)))("references_columns") + RPAR # + Optional(FK_MATCH)("references_fk_match") # MySQL # + Optional(FK_ON_DELETE)("references_fk_on_delete") # MySQL # + Optional(FK_ON_UPDATE)("references_fk_on_update") # MySQL ) ) & Optional(NOT_DEFERRABLE ^ DEFERRABLE)("deferrable") # Postgres, Oracle & Optional(INITIALLY_DEFERRED ^ INITIALLY_IMMEDIATE)("deferred") # Postgres, Oracle ) )("constraint") | Group( ((SUPPRESS_QUOTE + Word(alphanums + " _")("name") + SUPPRESS_QUOTE) ^ (Optional(SUPPRESS_QUOTE) + Word(alphanums + "_")("name") + Optional(SUPPRESS_QUOTE))) + Optional(Group( Group( Word(alphanums + "_") + Optional(CaselessKeyword("WITHOUT TIME ZONE") ^ CaselessKeyword("WITH TIME ZONE") ^ CaselessKeyword("PRECISION") ^ CaselessKeyword("VARYING")) )("type_name") + Optional(LPAR + Regex(r"[\d\*]+\s*,*\s*\d*")("length") + Optional(CHAR_SEMANTICS | BYTE_SEMANTICS)("semantics") + RPAR) + Optional(TYPE_UNSIGNED)("unsigned") + Optional(TYPE_ZEROFILL)("zerofill") )("type")) + Optional(Word(r"\[\]"))("array_brackets") + Optional( Regex(r"(?!--)", re.IGNORECASE) + Group( Optional(Regex(r"\b(?:NOT\s+)NULL?\b", re.IGNORECASE))("null") & Optional(Regex(r"\bAUTO(?:_)?INCREMENT\b", re.IGNORECASE))("auto_increment") & ( Optional(Regex(r"\b(UNIQUE|PRIMARY)(?:\s+KEY)?\b", re.IGNORECASE))("key") | ( (FOREIGN_KEY)("type") + LPAR + Group(delimitedList(Optional(SUPPRESS_QUOTE) + Word(alphanums + "_") + Optional(SUPPRESS_QUOTE)))("constraint_columns") + RPAR + Optional( Suppress(REFERENCES) + Optional(SUPPRESS_QUOTE) + Word(alphanums + "_")("references_table") + Optional(SUPPRESS_QUOTE) + LPAR + Group( delimitedList( Optional(SUPPRESS_QUOTE) + Word(alphanums + "_") + Optional(SUPPRESS_QUOTE) ) )("references_columns") + RPAR ) ) ) & Optional(Regex( r"\bDEFAULT\b\s+(?:((?:[A-Za-z0-9_\.\'\" -\{\}]|[^\x01-\x7E])*\:\:(?:character varying)?[A-Za-z0-9\[\]]+)|(?:\')((?:\\\'|[^\']|,)+)(?:\')|(?:\")((?:\\\"|[^\"]|,)+)(?:\")|([^,\s]+))", re.IGNORECASE))("default") & Optional(Regex(r"\bCOMMENT\b\s+(\'(\\\'|[^\']|,)+\'|\"(\\\"|[^\"]|,)+\"|[^,\s]+)", re.IGNORECASE))("comment") & Optional(Regex(r"\bENCODE\s+[A-Za-z0-9]+\b", re.IGNORECASE))("encode") # Redshift & Optional(COL_ATTR_DISTKEY)("distkey") # Redshift & Optional(COL_ATTR_SORTKEY)("sortkey") # Redshift & Optional(Suppress(COL_ATTR_CHARACTER_SET) + Word(alphanums + "_")("character_set")) # MySQL )("constraint") ) )("column") | COMMENT ) )("items") PARSE = Forward() PARSE << OneOrMore(COMMENT | CREATE_TABLE_STATEMENT) def __init__(self, sql=None): self.sql = sql def remove_cast(self, literal: str): if '::' in literal: literal = literal.split('::')[0] return literal def parse_literal(self, literal: str): return self.remove_cast(literal) def parse_expression(self, expression: str): # TODO: replace with real SQL parsing # this is super hacky if expression is None or not isinstance(expression, str): return expression if '(' in expression: match = self.FUNCTION_REGEX.match(expression) if match: fn = match.group(1) arguments = match.group(2) # assume 1 variable function call only # TODO: support multi-variable calls arguments = self.parse_literal(arguments) return {fn: arguments} else: result = self.parse_literal(expression) return result def get_column_type(self, type, brackets=None): if not type: # typeless, e.g. SQLite return None if 'type_name' not in type: raise ValueError(f'{type}: missing type name') type_name = ' '.join(type['type_name']).lower() length = type.get('length') semantics = type.get('semantics') unsigned = type.get('unsigned') zerofill = type.get('zerofill') optionals = [] if length: if semantics: optionals.append(f'({length} {semantics})') else: optionals.append(f'({length})') if unsigned: optionals.append(unsigned) if zerofill: optionals.append(zerofill) if optionals: type_name += ' '.join(optionals) if brackets: type_name += brackets # e.g. character varying (10) return type_name def get_column_definition(self, column): result = {} result['name'] = column['name'] result['type'] = self.get_column_type(column.get('type'), column.get('array_brackets')) result['default'] = column.get('default') constraint = column.get('constraint', {}) result['null'] = 'NOT NULL' not in constraint.get('null', '').upper() key = constraint.get('key', '') result['primary'] = key.upper() == 'PRIMARY KEY' result['unique'] = key.upper() in {'UNIQUE', 'UNIQUE KEY'} if key == 'FOREIGN KEY': result['related'] = { 'to': constraint['references_table'], 'by': self.get_columns(constraint['references_columns']) } result['sequence'] = 'auto_increment' in constraint result['related'] = None return result def get_constraint_type(self, type): return type.lower().replace('key', '').strip() def get_columns(self, columns): if columns is None: return None return columns.asList() def get_constraint_definition(self, constraint): result = {} result['name'] = constraint['name'] result['type'] = self.get_constraint_type(constraint.get('type')) result['deferrable'] = constraint.get('deferrable', False) result['deferred'] = constraint.get('deferred', '').upper() == 'INITIALLY DEFERRED' result['deferrable'] = constraint.get('deferrable', '').upper() == 'DEFERRABLE' result['check'] = constraint.get('check', None) result['columns'] = self.get_columns( constraint.get('constraint_columns')) result['related_name'] = constraint.get('references_table', None) result['related_columns'] = self.get_columns( constraint.get('references_columns')) return result def parse_statement(self, sql=None): """ Parse SQL into ZQL Arguments: sql: SQL statement, supports: - CREATE TABLE - CREATE INDEX (WIP) Return: ZQL object representing the CREATE TABLE statement """ sql = sql or self.sql if not sql: raise ValueError('`sql` is not specified') parsed = self.PARSE.parseString(sql) result = {} if 'table' not in parsed: raise ValueError(f'failed to parse SQL: "{sql}"') table = parsed['table'] schema = parsed.get('schema') result['name'] = f'{schema}.{table}' if schema else table result['temporary'] = "temporary" in parsed result['maybe'] = 'maybe' in parsed columns = [] constraints = [] for item in parsed["items"]: if item.getName() == "column": # add column # may have attached constraint columns.append(self.get_column_definition(item)) elif item.getName() == "constraint": # add constraint constraints.append(self.get_constraint_definition(item)) # use adbc.store.Table to update the constraints/columns name = table table = Table(name, backend=self, columns=columns, constraints=constraints) result['columns'] = named_dict_to_list(table.columns) result['constraints'] = named_dict_to_list(table.constraints) return {'create': {'table': result}}
def CORBA_IDL_BNF(): global bnf if not bnf: # punctuation colon = Literal(":") lbrace = Literal("{") rbrace = Literal("}") lbrack = Literal("[") rbrack = Literal("]") lparen = Literal("(") rparen = Literal(")") equals = Literal("=") comma = Literal(",") dot = Literal(".") slash = Literal("/") bslash = Literal("\\") star = Literal("*") semi = Literal(";") langle = Literal("<") rangle = Literal(">") # keywords any_ = Keyword("any") attribute_ = Keyword("attribute") boolean_ = Keyword("boolean") case_ = Keyword("case") char_ = Keyword("char") const_ = Keyword("const") context_ = Keyword("context") default_ = Keyword("default") double_ = Keyword("double") enum_ = Keyword("enum") exception_ = Keyword("exception") false_ = Keyword("FALSE") fixed_ = Keyword("fixed") float_ = Keyword("float") inout_ = Keyword("inout") interface_ = Keyword("interface") in_ = Keyword("in") long_ = Keyword("long") module_ = Keyword("module") object_ = Keyword("Object") octet_ = Keyword("octet") oneway_ = Keyword("oneway") out_ = Keyword("out") raises_ = Keyword("raises") readonly_ = Keyword("readonly") sequence_ = Keyword("sequence") short_ = Keyword("short") string_ = Keyword("string") struct_ = Keyword("struct") switch_ = Keyword("switch") true_ = Keyword("TRUE") typedef_ = Keyword("typedef") unsigned_ = Keyword("unsigned") union_ = Keyword("union") void_ = Keyword("void") wchar_ = Keyword("wchar") wstring_ = Keyword("wstring") identifier = Word(alphas, alphanums + "_").setName("identifier") #~ real = Combine( Word(nums+"+-", nums) + dot + Optional( Word(nums) ) #~ + Optional( CaselessLiteral("E") + Word(nums+"+-",nums) ) ) real = Regex(r"[+-]?\d+\.\d*([Ee][+-]?\d+)?").setName("real") #~ integer = ( Combine( CaselessLiteral("0x") + Word( nums+"abcdefABCDEF" ) ) | #~ Word( nums+"+-", nums ) ).setName("int") integer = Regex(r"0x[0-9a-fA-F]+|[+-]?\d+").setName("int") udTypeName = delimitedList(identifier, "::", combine=True).setName("udType") # have to use longest match for type, in case a user-defined type name starts with a keyword type, like "stringSeq" or "longArray" typeName = (any_ ^ boolean_ ^ char_ ^ double_ ^ fixed_ ^ float_ ^ long_ ^ octet_ ^ short_ ^ string_ ^ wchar_ ^ wstring_ ^ udTypeName).setName("type") sequenceDef = Forward().setName("seq") sequenceDef << Group(sequence_ + langle + (sequenceDef | typeName) + rangle) typeDef = sequenceDef | (typeName + Optional(lbrack + integer + rbrack)) typedefDef = Group(typedef_ + typeDef + identifier + semi).setName("typedef") moduleDef = Forward() constDef = Group(const_ + typeDef + identifier + equals + (real | integer | quotedString) + semi) #| quotedString ) exceptionItem = Group(typeDef + identifier + semi) exceptionDef = (exception_ + identifier + lbrace + ZeroOrMore(exceptionItem) + rbrace + semi) attributeDef = Optional( readonly_) + attribute_ + typeDef + identifier + semi paramlist = delimitedList( Group((inout_ | in_ | out_) + typeName + identifier)).setName("paramlist") operationDef = ( ( void_ ^ typeDef ) + identifier + lparen + Optional( paramlist ) + rparen + \ Optional( raises_ + lparen + Group( delimitedList( typeName ) ) + rparen ) + semi ) interfaceItem = (constDef | exceptionDef | attributeDef | operationDef) interfaceDef = Group( interface_ + identifier + Optional( colon + delimitedList( typeName ) ) + lbrace + \ ZeroOrMore( interfaceItem ) + rbrace + semi ).setName("opnDef") moduleItem = (interfaceDef | exceptionDef | constDef | typedefDef | moduleDef) moduleDef << module_ + identifier + lbrace + ZeroOrMore( moduleItem) + rbrace + semi bnf = (moduleDef | OneOrMore(moduleItem)) singleLineComment = "//" + restOfLine bnf.ignore(singleLineComment) bnf.ignore(cStyleComment) return bnf
from pyparsing import pyparsing_common as ppc # OnlyOnce, ParseResults, FollowedBy, Forward, NotAny, OneOrMore, ZeroOrMore, Optional, SkipTo, # Combine, Dict, Group, Suppress, # And, Each, MatchFirst, Or, CharsNotIn, Empty, Keyword, CaselessKeyword, Literal, CaselessLiteral, # NoMatch, QuotedString, Regex, White, Word from os.path import join, isfile, exists, isdir from os import listdir INPUT = "testFile" #r/w/rb/wb with open(INPUT, 'r') as f: inputText = f.read() logging.info("Read text : {}".format(inputText)) OPAR = Literal('(') CPAR = Literal(')') PARENS = OPAR | CPAR EQUATION = OPAR + Literal('+') + ppc.integer + ppc.integer + CPAR EQUATION.setParseAction(lambda toks: int(toks[2]) + int(toks[3])) LIST = OPAR + Literal('this') + ZeroOrMore(Word(alphas) | EQUATION) + CPAR LIST.setParseAction(lambda toks: [[x for x in toks if not PARENS.matches(x)]]) parser = OneOrMore(LIST) logging.info("Parsed Text: {}".format(parser.parseString(inputText)))
def parse(input): # parse a string into an element of the abstract representation # Grammar: # # <expr> ::= <integer> # true # false # <identifier> # ( if <expr> <expr> <expr> ) # ( let ( ( <name> <expr> ) ) <expr ) # ( function ( <name> ... ) <expr> ) # ( ref <expr> ) # ( <expr> <expr> ... ) # idChars = alphas + "_+*-?!=<>" pIDENTIFIER = Word(idChars, idChars + "0123456789") pIDENTIFIER.setParseAction(lambda result: EId(result[0])) # A name is like an identifier but it does not return an EId... pNAME = Word(idChars, idChars + "0123456789") pNAMES = ZeroOrMore(pNAME) pNAMES.setParseAction(lambda result: [result]) pINTEGER = Word("0123456789") pINTEGER.setParseAction(lambda result: EValue(VInteger(int(result[0])))) pBOOLEAN = Keyword("true") | Keyword("false") pBOOLEAN.setParseAction( lambda result: EValue(VBoolean(result[0] == "true"))) pEXPR = Forward() pEXPRS = ZeroOrMore(pEXPR) pEXPRS.setParseAction(lambda result: [result]) pIF = "(" + Keyword("if") + pEXPR + pEXPR + pEXPR + ")" pIF.setParseAction(lambda result: EIf(result[2], result[3], result[4])) pBINDING = "(" + pNAME + pEXPR + ")" pBINDING.setParseAction(lambda result: (result[1], result[2])) pBINDINGS = OneOrMore(pBINDING) pBINDINGS.setParseAction(lambda result: [result]) pLET = "(" + Keyword("let") + "(" + pBINDINGS + ")" + pEXPR + ")" pLET.setParseAction(lambda result: ELet(result[3], result[5])) pCALL = "(" + pEXPR + pEXPRS + ")" pCALL.setParseAction(lambda result: ECall(result[1], result[2])) pFUN = "(" + Keyword("function") + "(" + pNAMES + ")" + pEXPR + ")" pFUN.setParseAction(lambda result: EFunction(result[3], result[5])) pREF = "(" + Keyword("ref") + pEXPR + ")" pREF.setParseAction(lambda result: ERefCell(result[2])) pDO = "(" + Keyword("do") + pEXPRS + ")" pDO.setParseAction(lambda result: EDo(result[2])) pWHILE = "(" + Keyword("while") + pEXPR + pEXPR + ")" pWHILE.setParseAction(lambda result: EWhile(result[2], result[3])) pEXPR << (pINTEGER | pBOOLEAN | pIDENTIFIER | pIF | pLET | pFUN | pREF | pDO | pWHILE | pCALL) # can't attach a parse action to pEXPR because of recursion, so let's duplicate the parser pTOPEXPR = pEXPR.copy() pTOPEXPR.setParseAction(lambda result: { "result": "expression", "expr": result[0] }) pDEFINE = "(" + Keyword("define") + pNAME + pEXPR + ")" pDEFINE.setParseAction(lambda result: { "result": "value", "name": result[2], "expr": result[3] }) pDEFUN = "(" + Keyword("defun") + pNAME + "(" + pNAMES + ")" + pEXPR + ")" pDEFUN.setParseAction( lambda result: { "result": "function", "name": result[2], "params": result[4], "body": result[6] }) pABSTRACT = "#abs" + pEXPR pABSTRACT.setParseAction(lambda result: { "result": "abstract", "expr": result[1] }) pQUIT = Keyword("#quit") pQUIT.setParseAction(lambda result: {"result": "quit"}) pTOP = (pDEFUN | pDEFINE | pQUIT | pABSTRACT | pTOPEXPR) result = pTOP.parseString(input)[0] return result # the first element of the result is the expression
NO_BRTS = printables.replace('(', '').replace(')', '') SINGLE = Word(NO_BRTS.replace('*', '')) WILDCARDS = Optional('*') + SINGLE + Optional('*') + WordEnd(wordChars=NO_BRTS) QUOTED = quotedString.setParseAction(removeQuotes) OPER_AND = CaselessLiteral('and') OPER_OR = CaselessLiteral('or') OPER_NOT = '-' TERM = Combine(Optional(Word(alphas).setResultsName('meta') + ':') + (QUOTED.setResultsName('query') | WILDCARDS.setResultsName('query'))) TERM.setParseAction(create_q) EXPRESSION = operatorPrecedence(TERM, [ (OPER_NOT, 1, opAssoc.RIGHT), (OPER_OR, 2, opAssoc.LEFT), (Optional(OPER_AND, default='and'), 2, opAssoc.LEFT)]) EXPRESSION.setParseAction(union_q) QUERY = OneOrMore(EXPRESSION) + StringEnd() QUERY.setParseAction(union_q) def advanced_search(pattern): """ Parse the grammar of a pattern and build a queryset with it. """ query_parsed = QUERY.parseString(pattern) return Entry.published.filter(query_parsed[0]).distinct()
def _create_grammar(): """Create the DBC grammar. """ word = Word(printables.replace(';', '').replace(':', '')) integer = Group(Optional('-') + Word(nums)) positive_integer = Word(nums).setName('positive integer') number = Word(nums + '.Ee-+') colon = Suppress(Literal(':')) scolon = Suppress(Literal(';')) pipe = Suppress(Literal('|')) at = Suppress(Literal('@')) sign = Literal('+') | Literal('-') lp = Suppress(Literal('(')) rp = Suppress(Literal(')')) lb = Suppress(Literal('[')) rb = Suppress(Literal(']')) comma = Suppress(Literal(',')) node = Word(alphas + nums + '_-').setWhitespaceChars(' ') frame_id = Word(nums).setName('frame id') version = Group(Keyword('VERSION') - QuotedString()) version.setName(VERSION) symbol = Word(alphas + '_') + Suppress(LineEnd()) symbols = Group(Keyword('NS_') - colon - Group(ZeroOrMore(symbol))) symbols.setName('NS_') discard = Suppress(Keyword('BS_') - colon).setName('BS_') nodes = Group(Keyword('BU_') - colon - Group(ZeroOrMore(node))) nodes.setName('BU_') signal = Group( Keyword(SIGNAL) - Group(word + Optional(word)) - colon - Group(positive_integer - pipe - positive_integer - at - positive_integer - sign) - Group(lp - number - comma - number - rp) - Group(lb - number - pipe - number - rb) - QuotedString() - Group(delimitedList(node))) signal.setName(SIGNAL) message = Group( Keyword(MESSAGE) - frame_id - word - colon - positive_integer - word - Group(ZeroOrMore(signal))) message.setName(MESSAGE) event = Suppress( Keyword(EVENT) - word - colon - positive_integer - lb - number - pipe - number - rb - QuotedString() - number - number - word - node - scolon) event.setName(EVENT) comment = Group( Keyword(COMMENT) - ((Keyword(SIGNAL) - frame_id - word - QuotedString() - scolon).setName(SIGNAL) | (Keyword(MESSAGE) - frame_id - QuotedString() - scolon).setName(MESSAGE) | (Keyword(EVENT) - word - QuotedString() - scolon).setName(EVENT) | (Keyword(NODES) - word - QuotedString() - scolon).setName(NODES) | (QuotedString() - scolon).setName('QuotedString'))) comment.setName(COMMENT) attribute_definition = Group( Keyword(ATTRIBUTE_DEFINITION) - ((QuotedString()) | (Keyword(SIGNAL) | Keyword(MESSAGE) | Keyword(EVENT) | Keyword(NODES)) + QuotedString()) - word - (scolon | (Group(ZeroOrMore(Group( (comma | Empty()) + QuotedString()))) + scolon) | (Group(ZeroOrMore(number)) + scolon))) attribute_definition.setName(ATTRIBUTE_DEFINITION) attribute_definition_default = Group( Keyword(ATTRIBUTE_DEFINITION_DEFAULT) - QuotedString() - (number | QuotedString()) - scolon) attribute_definition_default.setName(ATTRIBUTE_DEFINITION_DEFAULT) attribute = Group( Keyword(ATTRIBUTE) - QuotedString() - Group( Optional((Keyword(MESSAGE) + frame_id) | (Keyword(SIGNAL) + frame_id + word) | (Keyword(NODES) + word))) - (QuotedString() | number) - scolon) attribute.setName(ATTRIBUTE) choice = Group( Keyword(CHOICE) - Group(Optional(frame_id)) - word - Group(OneOrMore(Group(integer + QuotedString()))) - scolon) choice.setName(CHOICE) value_table = Group( Keyword(VALUE_TABLE) - word - Group(OneOrMore(Group(integer + QuotedString()))) - scolon) value_table.setName(VALUE_TABLE) signal_type = Group( Keyword(SIGNAL_TYPE) - frame_id - word - colon - positive_integer - scolon) signal_type.setName(SIGNAL_TYPE) signal_multiplexer_values = Group( Keyword(SIGNAL_MULTIPLEXER_VALUES) - frame_id - word - word - Group( delimitedList(positive_integer - Suppress('-') - Suppress(positive_integer))) - scolon) signal_multiplexer_values.setName(SIGNAL_MULTIPLEXER_VALUES) message_add_sender = Group( Keyword(MESSAGE_TX_NODE) - frame_id - colon - Group(delimitedList(node)) - scolon) message_add_sender.setName(MESSAGE_TX_NODE) attribute_definition_rel = Group( Keyword(ATTRIBUTE_DEFINITION_REL) - (QuotedString() | (Keyword(NODES_REL) + QuotedString())) - word - (scolon | (Group(ZeroOrMore(Group( (comma | Empty()) + QuotedString()))) + scolon) | (Group(ZeroOrMore(number)) + scolon))) attribute_definition_rel.setName(ATTRIBUTE_DEFINITION_REL) attribute_definition_default_rel = Group( Keyword(ATTRIBUTE_DEFINITION_DEFAULT_REL) - QuotedString() - (number | QuotedString()) - scolon) attribute_definition_default_rel.setName(ATTRIBUTE_DEFINITION_DEFAULT_REL) attribute_rel = Group( Keyword(ATTRIBUTE_REL) - QuotedString() - Keyword(NODES_REL) - word - Keyword(SIGNAL) - frame_id - word - (positive_integer | QuotedString()) - scolon) attribute_rel.setName(ATTRIBUTE_REL) signal_group = Group( Keyword(SIGNAL_GROUP) - frame_id - word - integer - colon - OneOrMore(word) - scolon) signal_group.setName(SIGNAL_GROUP) entry = (message | comment | attribute | choice | attribute_definition | attribute_definition_default | attribute_rel | attribute_definition_rel | attribute_definition_default_rel | signal_group | event | message_add_sender | value_table | signal_type | signal_multiplexer_values | discard | nodes | symbols | version) frame_id.setParseAction(lambda _s, _l, t: int(t[0])) return OneOrMore(entry) + StringEnd()
| alinkProperty) # basic where clause simpleWhereClause = (where_ + oneOf(' '.join([ eventFields, instanceFields, signalFields, timex3Fields, tlinkFields, slinkFields, alinkFields ])).setResultsName("conditionField") + (is_ + Optional(not_.setResultsName("not_")) + alphaNums_.setResultsName("conditionValue") | state_ + is_ + Optional(not_.setResultsName("not_")) + state.setResultsName("state"))) # top-level statement definition cavatStmt << ( helpToken.setResultsName("action") + Optional(OneOrMore(alphaNums_).setResultsName("query")) | showToken.setResultsName("action") + reportType.setResultsName("report") + of_ + (tag.setResultsName("tag") + tlinkPositionedArg.setResultsName("start") + tlinkPositionedArg.setResultsName("end") + distance_.setResultsName( 'distance') + Optional(in_ + distanceUnits.setResultsName('units')) | fieldName.setResultsName("result") + Optional(simpleWhereClause.setResultsName("condition"))) + Optional(as_ + outputFormat.setResultsName("format")) | corpusToken.setResultsName("action") + ( import_.setResultsName("import_") + # can't use import as reserved word; use import_ instead. (fileName.setResultsName("directory") + to_ + alphaNums_.setResultsName("database") | alphaNums_.setResultsName("database") + from_ + fileName.setResultsName("directory"))
tokens = saludo.parseString("Hola, Mundo !") # Ahora parseamos una cadena, "Hola, Mundo!", # el metodo parseString, nos devuelve una lista con los tokens # encontrados, en caso de no haber errores... for i, token in enumerate(tokens): print("Token %d -> %s" % (i, token)) # imprimimos cada uno de los tokens Y listooo!!, he aquí a salida # Token 0 -> Hola # Token 1 -> , # Token 2-> Mundo # Token 3 -> ! # ahora cambia el parseador, aceptando saludos con mas que una sola palabra antes que ',' saludo = Group(OneOrMore(Word(alphas))) + "," + Word(alphas) + oneOf("! . ?") tokens = saludo.parseString("Hasta mañana, Mundo !") for i, token in enumerate(tokens): print("Token %d -> %s" % (i, token)) # Ahora parseamos algunas cadenas, usando el metodo runTests saludo.runTests( """\ Hola, Mundo! Hasta mañana, Mundo ! """, fullDump=False, ) # Por supuesto, se pueden "reutilizar" gramáticas, por ejemplo:
@property def state_machine(self): return StateMachine(self.initial_state) _tail = Suppress('--') _arrow = Suppress('->') state = Word(alphas, alphanums+'_') code = Regex('[A-Z0-9]{4}') transition = state + _tail + code + _arrow + state transitions = OneOrMore(transition) action = state + _arrow + code actions = ZeroOrMore(action) script = transitions + actions + StringEnd() script.ignore(dblSlashComment) model = SemanticModel() transition.setParseAction(model.parse_transition) action.setParseAction(model.parse_action) filename = sys.argv[1] with open(filename, 'r') as source: items = script.parseFile(source) sm = model.state_machine bus = NetworkBus()