"""All of the known stop conditions that we support.""" from pyparsing import Word, Regex, nums, hexnums, Literal, Optional, Group, oneOf, QuotedString, ParseException from iotile.core.exceptions import ArgumentError hex_number = (Regex(u'0x[0-9a-fA-F]+')).setParseAction(lambda s, l, t: [int(t[0], 0)]) dec_number = Word(nums).setParseAction(lambda s, l, t: [int(t[0], 10)]) number = hex_number | dec_number time_unit_multipliers = { u'second': 1, u'seconds': 1, u'minute': 60, u'minutes': 60, u'hour': 60*60, u'hours': 60*60, u'day': 60*60*24, u'days': 60*60*24, u'month': 60*60*24*30, u'months': 60*60*24*30, u'year': 60*60*24*365, u'years': 60*60*24*365, } time_unit = oneOf(u"second seconds minute minutes hour hours day days week weeks month months year years") time_interval = (number + time_unit).setParseAction(lambda s, l, t: [t[0]*time_unit_multipliers[t[1]]]) class StopCondition(object): """A condition under which the simulation should stop. Subclasses should override the one public method
class SqlGrammarMySQL(SqlGrammar): # ------------------------------------------------------------------------- # Forward declarations # ------------------------------------------------------------------------- expr = Forward() select_statement = Forward() # ------------------------------------------------------------------------- # Keywords # ------------------------------------------------------------------------- # https://dev.mysql.com/doc/refman/5.7/en/keywords.html mysql_reserved_words = """ ACCESSIBLE ADD ALL ALTER ANALYZE AND AS ASC ASENSITIVE BEFORE BETWEEN BIGINT BINARY BLOB BOTH BY CALL CASCADE CASE CHANGE CHAR CHARACTER CHECK COLLATE COLUMN CONDITION CONSTRAINT CONTINUE CONVERT CREATE CROSS CURRENT_DATE CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER CURSOR DATABASE DATABASES DAY_HOUR DAY_MICROSECOND DAY_MINUTE DAY_SECOND DEC DECIMAL DECLARE DEFAULT DELAYED DELETE DESC DESCRIBE DETERMINISTIC DISTINCT DISTINCTROW DIV DOUBLE DROP DUAL EACH ELSE ELSEIF ENCLOSED ESCAPED EXISTS EXIT EXPLAIN FALSE FETCH FLOAT FLOAT4 FLOAT8 FOR FORCE FOREIGN FROM FULLTEXT GENERATED GET GRANT GROUP HAVING HIGH_PRIORITY HOUR_MICROSECOND HOUR_MINUTE HOUR_SECOND IF IGNORE IN INDEX INFILE INNER INOUT INSENSITIVE INSERT INT INT1 INT2 INT3 INT4 INT8 INTEGER INTERVAL INTO IO_AFTER_GTIDS IO_BEFORE_GTIDS IS ITERATE JOIN KEY KEYS KILL LEADING LEAVE LEFT LIKE LIMIT LINEAR LINES LOAD LOCALTIME LOCALTIMESTAMP LOCK LONG LONGBLOB LONGTEXT LOOP LOW_PRIORITY MASTER_BIND MASTER_SSL_VERIFY_SERVER_CERT MATCH MAXVALUE MEDIUMBLOB MEDIUMINT MEDIUMTEXT MIDDLEINT MINUTE_MICROSECOND MINUTE_SECOND MOD MODIFIES NATURAL NOT NO_WRITE_TO_BINLOG NULL NUMERIC ON OPTIMIZE OPTIMIZER_COSTS OPTION OPTIONALLY OR ORDER OUT OUTER OUTFILE PARTITION PRECISION PRIMARY PROCEDURE PURGE RANGE READ READS READ_WRITE REAL REFERENCES REGEXP RELEASE RENAME REPEAT REPLACE REQUIRE RESIGNAL RESTRICT RETURN REVOKE RIGHT RLIKE SCHEMA SCHEMAS SECOND_MICROSECOND SELECT SENSITIVE SEPARATOR SET SHOW SIGNAL SMALLINT SPATIAL SPECIFIC SQL SQLEXCEPTION SQLSTATE SQLWARNING SQL_BIG_RESULT SQL_CALC_FOUND_ROWS SQL_SMALL_RESULT SSL STARTING STORED STRAIGHT_JOIN TABLE TERMINATED THEN TINYBLOB TINYINT TINYTEXT TO TRAILING TRIGGER TRUE UNDO UNION UNIQUE UNLOCK UNSIGNED UPDATE USAGE USE USING UTC_DATE UTC_TIME UTC_TIMESTAMP VALUES VARBINARY VARCHAR VARCHARACTER VARYING VIRTUAL WHEN WHERE WHILE WITH WRITE XOR YEAR_MONTH ZEROFILL """ mysql_nonreserved_keywords = """ ACCOUNT ACTION AFTER AGAINST AGGREGATE ALGORITHM ALWAYS ANALYSE ANY ASCII AT AUTOEXTEND_SIZE AUTO_INCREMENT AVG AVG_ROW_LENGTH BACKUP BEGIN BINLOG BIT BLOCK BOOL BOOLEAN BTREE BYTE CACHE CASCADED CATALOG_NAME CHAIN CHANGED CHANNEL CHARSET CHECKSUM CIPHER CLASS_ORIGIN CLIENT CLOSE COALESCE CODE COLLATION COLUMNS COLUMN_FORMAT COLUMN_NAME COMMENT COMMIT COMMITTED COMPACT COMPLETION COMPRESSED COMPRESSION CONCURRENT CONNECTION CONSISTENT CONSTRAINT_CATALOG CONSTRAINT_NAME CONSTRAINT_SCHEMA CONTAINS CONTEXT CPU CUBE CURRENT CURSOR_NAME DATA DATAFILE DATE DATETIME DAY DEALLOCATE DEFAULT_AUTH DEFINER DELAY_KEY_WRITE DES_KEY_FILE DIAGNOSTICS DIRECTORY DISABLE DISCARD DISK DO DUMPFILE DUPLICATE DYNAMIC ENABLE ENCRYPTION END ENDS ENGINE ENGINES ENUM ERROR ERRORS ESCAPE EVENT EVENTS EVERY EXCHANGE EXECUTE EXPANSION EXPIRE EXPORT EXTENDED EXTENT_SIZE FAST FAULTS FIELDS FILE FILE_BLOCK_SIZE FILTER FIRST FIXED FLUSH FOLLOWS FORMAT FOUND FULL FUNCTION GENERAL GEOMETRY GEOMETRYCOLLECTION GET_FORMAT GLOBAL GRANTS GROUP_REPLICATION HANDLER HASH HELP HOST HOSTS HOUR IDENTIFIED IGNORE_SERVER_IDS IMPORT INDEXES INITIAL_SIZE INSERT_METHOD INSTALL INSTANCE INVOKER IO IO_THREAD IPC ISOLATION ISSUER JSON KEY_BLOCK_SIZE LANGUAGE LAST LEAVES LESS LEVEL LINESTRING LIST LOCAL LOCKS LOGFILE LOGS MASTER MASTER_AUTO_POSITION MASTER_CONNECT_RETRY MASTER_DELAY MASTER_HEARTBEAT_PERIOD MASTER_HOST MASTER_LOG_FILE MASTER_LOG_POS MASTER_PASSWORD MASTER_PORT MASTER_RETRY_COUNT MASTER_SERVER_ID MASTER_SSL MASTER_SSL_CA MASTER_SSL_CAPATH MASTER_SSL_CERT MASTER_SSL_CIPHER MASTER_SSL_CRL MASTER_SSL_CRLPATH MASTER_SSL_KEY MASTER_TLS_VERSION MASTER_USER MAX_CONNECTIONS_PER_HOUR MAX_QUERIES_PER_HOUR MAX_ROWS MAX_SIZE MAX_STATEMENT_TIME MAX_UPDATES_PER_HOUR MAX_USER_CONNECTIONS MEDIUM MEMORY MERGE MESSAGE_TEXT MICROSECOND MIGRATE MINUTE MIN_ROWS MODE MODIFY MONTH MULTILINESTRING MULTIPOINT MULTIPOLYGON MUTEX MYSQL_ERRNO NAME NAMES NATIONAL NCHAR NDB NDBCLUSTER NEVER NEW NEXT NO NODEGROUP NONBLOCKING NONE NO_WAIT NUMBER NVARCHAR OFFSET OLD_PASSWORD ONE ONLY OPEN OPTIONS OWNER PACK_KEYS PAGE PARSER PARSE_GCOL_EXPR PARTIAL PARTITIONING PARTITIONS PASSWORD PHASE PLUGIN PLUGINS PLUGIN_DIR POINT POLYGON PORT PRECEDES PREPARE PRESERVE PREV PRIVILEGES PROCESSLIST PROFILE PROFILES PROXY QUARTER QUERY QUICK READ_ONLY REBUILD RECOVER REDOFILE REDO_BUFFER_SIZE REDUNDANT RELAY RELAYLOG RELAY_LOG_FILE RELAY_LOG_POS RELAY_THREAD RELOAD REMOVE REORGANIZE REPAIR REPEATABLE REPLICATE_DO_DB REPLICATE_DO_TABLE REPLICATE_IGNORE_DB REPLICATE_IGNORE_TABLE REPLICATE_REWRITE_DB REPLICATE_WILD_DO_TABLE REPLICATE_WILD_IGNORE_TABLE REPLICATION RESET RESTORE RESUME RETURNED_SQLSTATE RETURNS REVERSE ROLLBACK ROLLUP ROTATE ROUTINE ROW ROWS ROW_COUNT ROW_FORMAT RTREE SAVEPOINT SCHEDULE SCHEMA_NAME SECOND SECURITY SERIAL SERIALIZABLE SERVER SESSION SHARE SHUTDOWN SIGNED SIMPLE SLAVE SLOW SNAPSHOT SOCKET SOME SONAME SOUNDS SOURCE SQL_AFTER_GTIDS SQL_AFTER_MTS_GAPS SQL_BEFORE_GTIDS SQL_BUFFER_RESULT SQL_CACHE SQL_NO_CACHE SQL_THREAD SQL_TSI_DAY SQL_TSI_HOUR SQL_TSI_MINUTE SQL_TSI_MONTH SQL_TSI_QUARTER SQL_TSI_SECOND SQL_TSI_WEEK SQL_TSI_YEAR STACKED START STARTS STATS_AUTO_RECALC STATS_PERSISTENT STATS_SAMPLE_PAGES STATUS STOP STORAGE STRING SUBCLASS_ORIGIN SUBJECT SUBPARTITION SUBPARTITIONS SUPER SUSPEND SWAPS SWITCHES TABLES TABLESPACE TABLE_CHECKSUM TABLE_NAME TEMPORARY TEMPTABLE TEXT THAN TIME TIMESTAMP TIMESTAMPADD TIMESTAMPDIFF TRANSACTION TRIGGERS TRUNCATE TYPE TYPES UNCOMMITTED UNDEFINED UNDOFILE UNDO_BUFFER_SIZE UNICODE UNINSTALL UNKNOWN UNTIL UPGRADE USER USER_RESOURCES USE_FRM VALIDATION VALUE VARIABLES VIEW WAIT WARNINGS WEEK WEIGHT_STRING WITHOUT WORK WRAPPER X509 XA XID XML YEAR """ mysql_keywords = sorted(list(set( mysql_reserved_words.split() + ANSI92_RESERVED_WORD_LIST.split() ))) # log.critical(mysql_keywords) keyword = make_words_regex(mysql_keywords, caseless=True, name="keyword") # ------------------------------------------------------------------------- # Comments # ------------------------------------------------------------------------- # http://dev.mysql.com/doc/refman/5.7/en/comments.html comment = (ansi_comment | bash_comment | cStyleComment) # ----------------------------------------------------------------------------- # identifier # ----------------------------------------------------------------------------- # http://dev.mysql.com/doc/refman/5.7/en/identifiers.html bare_identifier_word = make_regex_except_words( r"\b[a-zA-Z0-9$_]*\b", mysql_keywords, caseless=True, name="bare_identifier_word" ) liberal_identifier_word = make_pyparsing_regex( r"\b[a-zA-Z0-9$_]*\b", caseless=True, name="liberal_identifier_word" ) identifier = ( bare_identifier_word | QuotedString(quoteChar="`", unquoteResults=False) ).setName("identifier") liberal_identifier = ( liberal_identifier_word | QuotedString(quoteChar="`", unquoteResults=False) ).setName("liberal_identifier") # http://dev.mysql.com/doc/refman/5.7/en/charset-collate.html collation_name = identifier.copy() column_name = identifier.copy() column_alias = identifier.copy() table_name = identifier.copy() table_alias = identifier.copy() index_name = identifier.copy() function_name = identifier.copy() parameter_name = identifier.copy() database_name = identifier.copy() partition_name = identifier.copy() no_dot = NotAny('.') # MySQL allows keywords in the later parts of combined identifiers; # therefore, for example, "count.thing.thing" is not OK, but # "thing.thing.count" is. table_spec = ( Combine(database_name + '.' + liberal_identifier + no_dot) | table_name + no_dot ).setName("table_spec") column_spec = ( Combine(database_name + '.' + liberal_identifier + '.' + liberal_identifier + no_dot) | Combine(table_name + '.' + liberal_identifier + no_dot) | Combine(column_name + no_dot) ).setName("column_spec") # http://dev.mysql.com/doc/refman/5.7/en/expressions.html bind_parameter = Literal('?') # http://dev.mysql.com/doc/refman/5.7/en/user-variables.html variable = Regex(r"@[a-zA-Z0-9\.$_]+").setName("variable") # http://dev.mysql.com/doc/refman/5.7/en/functions.html argument_list = ( delimitedList(expr).setName("arglist").setParseAction(', '.join) ) # ... we don't care about sub-parsing the argument list, so use combine=True # or setParseAction: http://stackoverflow.com/questions/37926516 function_call = Combine(function_name + LPAR) + argument_list + RPAR # http://dev.mysql.com/doc/refman/5.7/en/partitioning-selection.html partition_list = ( LPAR + delim_list(partition_name, combine=True) + RPAR ).setName("partition_list") # http://dev.mysql.com/doc/refman/5.7/en/index-hints.html index_list = delim_list(index_name, combine=False) # ... see pyparsing_bugtest_delimited_list_combine index_hint = ( ( USE + (INDEX | KEY) + Optional(FOR + (JOIN | (ORDER + BY) | (GROUP + BY))) + LPAR + Optional(index_list) + RPAR ) | ( IGNORE + (INDEX | KEY) + Optional(FOR + (JOIN | (ORDER + BY) | (GROUP + BY))) + LPAR + index_list + RPAR ) | ( FORCE + (INDEX | KEY) + Optional(FOR + (JOIN | (ORDER + BY) | (GROUP + BY))) + LPAR + index_list + RPAR ) ) index_hint_list = delim_list(index_hint, combine=True).setName( "index_hint_list") # ----------------------------------------------------------------------------- # CASE # ----------------------------------------------------------------------------- # NOT THIS: https://dev.mysql.com/doc/refman/5.7/en/case.html # THIS: https://dev.mysql.com/doc/refman/5.7/en/control-flow-functions.html#operator_case # noqa case_expr = ( ( CASE + expr + OneOrMore(WHEN + expr + THEN + expr) + Optional(ELSE + expr) + END ) | ( CASE + OneOrMore(WHEN + expr + THEN + expr) + Optional(ELSE + expr) + END ) ).setName("case_expr") # ------------------------------------------------------------------------- # MATCH # ------------------------------------------------------------------------- # https://dev.mysql.com/doc/refman/5.7/en/fulltext-search.html#function_match search_modifier = ( (IN + NATURAL + LANGUAGE + MODE + Optional(WITH + QUERY + EXPANSION)) | (IN + BOOLEAN + MODE) | (WITH + QUERY + EXPANSION) ) match_expr = ( MATCH + LPAR + delim_list(column_spec) + RPAR + AGAINST + LPAR + string_literal + Optional(search_modifier) + RPAR ).setName("match_expr") # ... don't use "expr"; MATCH AGAINST uses restricted expressions, and we # don't want it to think that "MATCH ... AGAINST ('+keyword' IN # BOOLEAN MODE)" resembles the IN in "WHERE something IN (SELECT ...)" # ----------------------------------------------------------------------------- # Expressions # ----------------------------------------------------------------------------- # http://dev.mysql.com/doc/refman/5.7/en/expressions.html # https://pyparsing.wikispaces.com/file/view/select_parser.py # http://dev.mysql.com/doc/refman/5.7/en/operator-precedence.html expr_term = ( INTERVAL + expr + time_unit | # "{" + identifier + expr + "}" | # see MySQL notes; antique ODBC syntax # noqa Optional(EXISTS) + LPAR + select_statement + RPAR | # ... e.g. mycol = EXISTS(SELECT ...) # ... e.g. mycol IN (SELECT ...) LPAR + delim_list(expr) + RPAR | # ... e.g. mycol IN (1, 2, 3) -- "(1, 2, 3)" being a term here case_expr | match_expr | bind_parameter | variable | function_call | literal_value | column_spec # not just identifier ) UNARY_OP, BINARY_OP, TERNARY_OP = 1, 2, 3 expr << infixNotation(expr_term, [ # https://pythonhosted.org/pyparsing/ # Having lots of operations in the list here SLOWS IT DOWN A LOT. # Just combine them into an ordered list. (BINARY | COLLATE | oneOf('! - + ~'), UNARY_OP, opAssoc.RIGHT), ( ( oneOf('^ * / %') | DIV | MOD | oneOf('+ - << >> & | = <=> >= > <= < <> !=') | (IS + Optional(NOT)) | LIKE | REGEXP | (Optional(NOT) + IN) | (SOUNDS + LIKE) ), # RNC; presumably at same level as LIKE BINARY_OP, opAssoc.LEFT ), ((BETWEEN, AND), TERNARY_OP, opAssoc.LEFT), # CASE handled above (hoping precedence is not too much of a problem) (NOT, UNARY_OP, opAssoc.RIGHT), (AND | '&&' | XOR | OR | '||' | ':=', BINARY_OP, opAssoc.LEFT), ], lpar=LPAR, rpar=RPAR) # ignores LIKE [ESCAPE] # ------------------------------------------------------------------------- # SELECT # ------------------------------------------------------------------------- compound_operator = UNION + Optional(ALL | DISTINCT) # no INTERSECT or EXCEPT in MySQL? ordering_term = ( expr + Optional(COLLATE + collation_name) + Optional(ASC | DESC) ) # ... COLLATE can appear in lots of places; # http://dev.mysql.com/doc/refman/5.7/en/charset-collate.html join_constraint = Optional(Group( # join_condition in MySQL grammar (ON + expr) | (USING + LPAR + delim_list(column_name) + RPAR) )) # http://dev.mysql.com/doc/refman/5.7/en/join.html join_op = Group( COMMA | STRAIGHT_JOIN | NATURAL + (Optional(LEFT | RIGHT) + Optional(OUTER)) + JOIN | (INNER | CROSS) + JOIN | Optional(LEFT | RIGHT) + Optional(OUTER) + JOIN # ignores antique ODBC "{ OJ ... }" syntax ) join_source = Forward() single_source = ( ( table_spec.copy().setResultsName("from_tables", listAllMatches=True) + Optional(PARTITION + partition_list) + Optional(Optional(AS) + table_alias) + Optional(index_hint_list) ) | (select_statement + Optional(AS) + table_alias) + (LPAR + join_source + RPAR) ) join_source << Group( single_source + ZeroOrMore(join_op + single_source + join_constraint) )("join_source") # ... must have a Group to append to it later, it seems # ... but name it "join_source" here, or it gets enclosed in a further list # when you name it later aggregate_function = ( # https://dev.mysql.com/doc/refman/5.7/en/group-by-functions.html AVG | BIT_AND | BIT_OR | BIT_XOR | COUNT | # also: special handling for COUNT(DISTINCT ...), see below GROUP_CONCAT | MAX | MIN | STD | STDDEV | STDDEV_POP | STDDEV_SAMP | SUM | VAR_POP | VAR_SAMP | VARIANCE ) result_base = ( # Aggregate functions: e.g. "MAX(" allowed, "MAX (" not allowed Combine(COUNT + LPAR) + '*' + RPAR | # special aggregate function Combine(COUNT + LPAR) + DISTINCT + expr + RPAR | # special aggregate function # noqa Combine(aggregate_function + LPAR) + expr + RPAR | expr | '*' | Combine(table_name + '.' + '*') | column_spec | literal_value ) result_column = ( result_base + Optional(Optional(AS) + column_alias) ).setResultsName("select_columns", listAllMatches=True) # ------------------------------------------------------------------------- # SELECT # ------------------------------------------------------------------------- # http://dev.mysql.com/doc/refman/5.7/en/select.html """ SELECT [ALL | DISTINCT | DISTINCTROW ] [HIGH_PRIORITY] [MAX_STATEMENT_TIME = N] [STRAIGHT_JOIN] [SQL_SMALL_RESULT] [SQL_BIG_RESULT] [SQL_BUFFER_RESULT] [SQL_CACHE | SQL_NO_CACHE] [SQL_CALC_FOUND_ROWS] select_expr [, select_expr ...] [FROM table_references [PARTITION partition_list] [WHERE where_condition] [GROUP BY {col_name | expr | position} [ASC | DESC], ... [WITH ROLLUP]] [HAVING where_condition] [ORDER BY {col_name | expr | position} [ASC | DESC], ...] [LIMIT {[offset,] row_count | row_count OFFSET offset}] ... ignore below here... [PROCEDURE procedure_name(argument_list)] [INTO OUTFILE 'file_name' [CHARACTER SET charset_name] export_options | INTO DUMPFILE 'file_name' | INTO var_name [, var_name]] [FOR UPDATE | LOCK IN SHARE MODE]] """ where_expr = Group(expr).setResultsName("where_expr") where_clause = Group( Optional(WHERE + where_expr) ).setResultsName("where_clause") select_core = ( SELECT + Group(Optional(ALL | DISTINCT | DISTINCTROW))("select_specifier") + Optional(HIGH_PRIORITY) + Optional(MAX_STATEMENT_TIME + '=' + integer) + Optional(STRAIGHT_JOIN) + Optional(SQL_SMALL_RESULT) + Optional(SQL_BIG_RESULT) + Optional(SQL_BUFFER_RESULT) + Optional(SQL_CACHE | SQL_NO_CACHE) + Optional(SQL_CALC_FOUND_ROWS) + Group(delim_list(result_column))("select_expression") + Optional( FROM + join_source + Optional(PARTITION + partition_list) + where_clause + Optional( GROUP + BY + delim_list(ordering_term + Optional(ASC | DESC))("group_by_term") + Optional(WITH + ROLLUP) ) + Optional(HAVING + expr("having_expr")) ) ) select_statement << ( select_core + ZeroOrMore(compound_operator + select_core) + Optional( ORDER + BY + delim_list(ordering_term + Optional(ASC | DESC))("order_by_terms") ) + Optional(LIMIT + ( (Optional(integer("offset") + COMMA) + integer("row_count")) | (integer("row_count") + OFFSET + integer("offset")) )) + # PROCEDURE ignored # rest ignored Optional(';') ) select_statement.ignore(comment) # http://dev.mysql.com/doc/refman/5.7/en/identifiers.html # ... approximately (and conservatively): MYSQL_INVALID_FIRST_IF_UNQUOTED = re.compile(r"[^a-zA-Z_$]") MYSQL_INVALID_IF_UNQUOTED = re.compile(r"[^a-zA-Z0-9_$]") def __init__(self): super().__init__() @classmethod def quote_identifier(cls, identifier: str) -> str: return "`{}`".format(identifier) @classmethod def is_quoted(cls, identifier: str) -> bool: return identifier.startswith("`") and identifier.endswith("`") @classmethod def requires_quoting(cls, identifier: str) -> bool: assert identifier, "Empty identifier" if cls.MYSQL_INVALID_IF_UNQUOTED.search(identifier): return True firstchar = identifier[0] if cls.MYSQL_INVALID_FIRST_IF_UNQUOTED.search(firstchar): return True if identifier.upper() in cls.mysql_keywords: return True return False @classmethod def get_grammar(cls): # Grammar (here, just SELECT) return cls.select_statement @classmethod def get_column_spec(cls): return cls.column_spec @classmethod def get_result_column(cls): return cls.result_column @classmethod def get_join_op(cls): return cls.join_op @classmethod def get_table_spec(cls): return cls.table_spec @classmethod def get_join_constraint(cls): return cls.join_constraint @classmethod def get_select_statement(cls): return cls.select_statement @classmethod def get_expr(cls): return cls.expr @classmethod def get_where_clause(cls): return cls.where_clause @classmethod def get_where_expr(cls): return cls.where_expr @classmethod def test_dialect_specific_1(cls): log.info("Testing MySQL-specific aspects (1/2)...") test_fail(cls.case_expr, "one two three four") test_fail(cls.match_expr, "one two three four") test_fail(cls.bind_parameter, "one two three four") test_fail(cls.variable, "one two three four") test_fail(cls.function_call, "one two three four") test_fail(literal_value, "one two three four") # test_fail(cls.column_spec, "one two three four") # matches "one" @classmethod def test_dialect_specific_2(cls): log.info("Testing MySQL-specific aspects (2/2)...") log.info("Testing expr") test_succeed(cls.expr, "a DIV b") test_succeed(cls.expr, "a MOD b") log.info("Testing quoted identifiers") test_succeed(cls.identifier, "`a`") test_succeed(cls.identifier, "`FROM`") test_succeed(cls.identifier, "`SELECT FROM`") # MySQL uses up to: schema.table.column test_succeed(cls.table_spec, "mydb.`my silly table`") test_succeed(cls.table_spec, "myschema.mytable") test_fail(cls.table_spec, "mydb.myschema.mytable") # ... but not 4: test_succeed(cls.column_spec, "`my silly table`.`my silly column`") test_succeed(cls.column_spec, "myschema.mytable.mycol") test_succeed(cls.column_spec, "starfeeder.mass_event.thing") test_succeed(cls.column_spec, "starfeeder.mass_event.at") test_fail(cls.column_spec, "mydb.myschema.mytable.mycol") log.info("Testing variable") test_succeed(cls.variable, "@myvar") log.info("Testing argument_list") test_succeed(cls.argument_list, "@myvar, 5") log.info("Testing function_call") test_succeed(cls.function_call, "myfunc(@myvar, 5)") log.info("Testing index_list") test_succeed(cls.index_list, "idx1, idx2") log.info("Testing index_hint") test_succeed(cls.index_hint, "USE INDEX FOR JOIN (idx1, idx2)") log.info("Testing case_expr") test_succeed(cls.case_expr, """ CASE v WHEN 2 THEN x WHEN 3 THEN y ELSE -99 END """) log.info("Testing match_expr") test_succeed(cls.match_expr, """ MATCH (content_field) AGAINST('+keyword1 +keyword2') """) test_succeed(cls.match_expr, """ MATCH (content_field) AGAINST('+keyword1 +keyword2' IN BOOLEAN MODE) """)
LBRACKET = L("[").suppress() RBRACKET = L("]").suppress() LPAREN = L("(").suppress() RPAREN = L(")").suppress() COMMA = L(",").suppress() SEMICOLON = L(";").suppress() AT = L("@").suppress() PUNCTUATION = Word("-_.") IDENTIFIER_END = ALPHANUM | (ZeroOrMore(PUNCTUATION) + ALPHANUM) IDENTIFIER = Combine(ALPHANUM + ZeroOrMore(IDENTIFIER_END)) NAME = IDENTIFIER("name") EXTRA = IDENTIFIER URI = Regex(r"[^ ]+")("url") URL = AT + URI EXTRAS_LIST = EXTRA + ZeroOrMore(COMMA + EXTRA) EXTRAS = (LBRACKET + Optional(EXTRAS_LIST) + RBRACKET)("extras") VERSION_PEP440 = Regex(Specifier._regex_str, re.VERBOSE | re.IGNORECASE) VERSION_LEGACY = Regex(LegacySpecifier._regex_str, re.VERBOSE | re.IGNORECASE) VERSION_ONE = VERSION_PEP440 ^ VERSION_LEGACY VERSION_MANY = Combine( VERSION_ONE + ZeroOrMore(COMMA + VERSION_ONE), joinString=",", adjacent=False )("_raw_spec") _VERSION_SPEC = Optional(((LPAREN + VERSION_MANY + RPAREN) | VERSION_MANY)) _VERSION_SPEC.setParseAction(lambda s, l, t: t._raw_spec or "")
condExpr = expr + COMPARISON_OP + expr ifFunc = (CaselessKeyword("if") + LPAR + Group(condExpr)("condition") + COMMA + expr("if_true") + COMMA + expr("if_false") + RPAR) statFunc = lambda name: CaselessKeyword(name) + LPAR + delimitedList(expr ) + RPAR sumFunc = statFunc("sum") minFunc = statFunc("min") maxFunc = statFunc("max") aveFunc = statFunc("ave") funcCall = ifFunc | sumFunc | minFunc | maxFunc | aveFunc multOp = oneOf("* /") addOp = oneOf("+ -") numericLiteral = Regex(r"\-?\d+(\.\d+)?") operand = numericLiteral | funcCall | cellRange | cellRef arithExpr = operatorPrecedence(operand, [ (multOp, 2, opAssoc.LEFT), (addOp, 2, opAssoc.LEFT), ]) textOperand = dblQuotedString | cellRef textExpr = operatorPrecedence(textOperand, [ ('&', 2, opAssoc.LEFT), ]) expr << (arithExpr | textExpr) test1 = "=3*A7+5" test2 = "=3*Sheet1!$A$7+5" test2a = "=3*'Sheet 1'!$A$7+5"
from pyparsing import LineEnd, LineStart, SkipTo, Regex from regparser.grammar import atomic, unified section = (atomic.section_marker.copy().leaveWhitespace() + unified.part_section + SkipTo(LineEnd())) par = (atomic.section.copy().leaveWhitespace() + unified.depth1_p + SkipTo(LineEnd())) marker_par = (atomic.paragraph_marker.copy().leaveWhitespace() + atomic.section + unified.depth1_p) # This matches an appendix name in an appendix header. Here we'll match # something with a dash in the appendix name (i.e. AA-1) but we'll # remove the dash. The effect of this is that, for label purposes only, # the appendix becomes known as 'AA1', and therefore we don't have weird # label collisions with a node labeled '1' underneath the appendix. appendix = ( atomic.appendix_marker.copy().leaveWhitespace() + Regex(r"[A-Z]+-?[0-9]*\b").setResultsName("appendix").setParseAction( lambda r: r[0].replace('-', '')).setResultsName("appendix") + SkipTo(LineEnd())) parser = LineStart() + (section | marker_par | par | appendix)
else: res += [b, rdflib.RDF.first, x] res += [b, rdflib.RDF.rest, rdflib.RDF.nil] res += other if DEBUG: print "CollectionOut", res return [res] # SPARQL Grammar from http://www.w3.org/TR/sparql11-query/#grammar # ------ TERMINALS -------------- # [139] IRIREF ::= '<' ([^<>"{}|^`\]-[#x00-#x20])* '>' IRIREF = Combine( Suppress('<') + Regex(r'[^<>"{}|^`\\%s]*' % ''.join('\\x%02X' % i for i in range(33))) + Suppress('>')) IRIREF.setParseAction(lambda x: rdflib.URIRef(x[0])) # [164] P_CHARS_BASE ::= [A-Z] | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] if sys.maxunicode == 0xffff: # this is narrow python build (default on windows/osx) # this means that unicode code points over 0xffff are stored # as several characters, which in turn means that regex character # ranges with these characters do not work. # See # * http://bugs.python.org/issue12729 # * http://bugs.python.org/issue12749 # * http://bugs.python.org/issue3665 #
FollowedBy,empty __all__ = ['tapOutputParser', 'TAPTest', 'TAPSummary'] # newlines are significant whitespace, so set default skippable # whitespace to just spaces and tabs ParserElement.setDefaultWhitespaceChars(" \t") NL = LineEnd().suppress() integer = Word(nums) plan = '1..' + integer("ubound") OK, NOT_OK = map(Literal, ['ok', 'not ok']) testStatus = (OK | NOT_OK) description = Regex("[^#\n]+") description.setParseAction(lambda t: t[0].lstrip('- ')) TODO, SKIP = map(CaselessLiteral, 'TODO SKIP'.split()) directive = Group( Suppress('#') + (TODO + restOfLine | FollowedBy(SKIP) + restOfLine.copy().setParseAction(lambda t: ['SKIP', t[0]]))) commentLine = Suppress("#") + empty + restOfLine testLine = Group( Optional(OneOrMore(commentLine + NL))("comments") + testStatus("passed") + Optional(integer)("testNumber") + Optional(description)("description") + Optional(directive)("directive")) bailLine = Group(
from pyparsing import Regex, OneOrMore, delimitedList, LineEnd, Group cell = Regex(r'[^",\n\r]+') | Regex(r'"[^"]*"') row = delimitedList(cell, delim=",") + LineEnd().suppress() header = row.copy().setName('header') csv = Group(header) + Group(OneOrMore(Group(row)))
def add_id(s, loc, tok): ids.append( tok[0] ) def add_size(s, loc, tok): sizes.append( tok[1] ) # print( '{0}\t\t : width({1}) height({2})'.format(tok[0], tok[1][0], tok[1][1]) ) def add_coordinate(s, loc, tok): coordinates.append( tuple(tok[1]) ) # print( '{0}\t\t : x({1}) y({2})'.format(tok[0], tok[1][0], tok[1][1]) ) #define grammars ######################################################################################################################### #========================================================common rule======================================================# ######################################################################################################################### header = Regex(r"^UCLA.*")#.setParseAction( lambda tokens : print( tokens[0]) ) # comment = Regex(r"#.*" )#.setParseAction( lambda tokens : print( tokens[0]) ) # node_id = Word(alphanums ).setParseAction( add_id ) # ########################################################################################################################### #========================================================node rule======================================================# ########################################################################################################################### num_nodes = Regex(r"NumNodes.*" )#.setParseAction( lambda tokens : print( tokens[0]) ) # num_terminals = Regex(r"NumTerminals.*")#.setParseAction( lambda tokens : print( tokens[0]) ) # width = Word(nums).setParseAction(lambda tokens : int(tokens[0]))#.setResultsName("width") # height = Word(nums).setParseAction(lambda tokens : int(tokens[0]))#.setResultsName("height") # node = ( node_id + Group(width + height) + Optional(Literal("terminal")) ).setParseAction(add_size) # node_grammar = header + ZeroOrMore(comment) + num_nodes + num_terminals + OneOrMore(node) # ########################################################################################################################### #========================================================pl rule======================================================# ########################################################################################################################### x = Word(nums).setParseAction(lambda tokens : int(tokens[0])).setResultsName("x") #
def parse(content, basedir=None, resolve=True): """parse a HOCON content :param content: HOCON content to parse :type content: basestring :param resolve: If true, resolve substitutions :type resolve: boolean :return: a ConfigTree or a list """ def norm_string(value): for k, v in ConfigParser.REPLACEMENTS.items(): value = value.replace(k, v) return value def unescape_string(tokens): return ConfigUnquotedString(norm_string(tokens[0])) def parse_multi_string(tokens): # remove the first and last 3 " return tokens[0][3:-3] def convert_number(tokens): n = tokens[0] try: return int(n) except ValueError: return float(n) # ${path} or ${?path} for optional substitution SUBSTITUTION_PATTERN = "\$\{(?P<optional>\?)?(?P<variable>[^}]+)\}(?P<ws>[ \t]*)" def create_substitution(instring, loc, token): # remove the ${ and } match = re.match(SUBSTITUTION_PATTERN, token[0]) variable = match.group('variable') ws = match.group('ws') optional = match.group('optional') == '?' substitution = ConfigSubstitution(variable, optional, ws, instring, loc) return substitution # ${path} or ${?path} for optional substitution STRING_PATTERN = '(")(?P<value>[^"]*)\\1(?P<ws>[ \t]*)' def create_quoted_string(instring, loc, token): # remove the ${ and } match = re.match(STRING_PATTERN, token[0]) value = norm_string(match.group('value')) ws = match.group('ws') return ConfigQuotedString(value, ws, instring, loc) def include_config(instring, loc, token): url = None file = None required = False if token[0] == 'required': required = True final_tokens = token[1:] else: final_tokens = token if len(final_tokens) == 1: # include "test" value = final_tokens[0].value if isinstance( final_tokens[0], ConfigQuotedString) else final_tokens[0] if value.startswith("http://") or value.startswith( "https://") or value.startswith("file://"): url = value else: file = value elif len(final_tokens) == 2: # include url("test") or file("test") value = final_tokens[1].value if isinstance( token[1], ConfigQuotedString) else final_tokens[1] if final_tokens[0] == 'url': url = value else: file = value if url is not None: logger.debug('Loading config from url %s', url) obj = ConfigFactory.parse_URL(url, resolve=False, required=required) elif file is not None: path = file if basedir is None else os.path.join(basedir, file) logger.debug('Loading config from file %s', path) obj = ConfigFactory.parse_file(path, resolve=False, required=required) else: raise ConfigException( 'No file or URL specified at: {loc}: {instring}', loc=loc, instring=instring) return ConfigInclude(obj if isinstance(obj, list) else obj.items()) ParserElement.setDefaultWhitespaceChars(' \t') assign_expr = Forward() true_expr = Keyword("true", caseless=True).setParseAction(replaceWith(True)) false_expr = Keyword("false", caseless=True).setParseAction(replaceWith(False)) null_expr = Keyword("null", caseless=True).setParseAction( replaceWith(NoneValue())) key = QuotedString( '"', escChar='\\', unquoteResults=False) | Word(alphanums + alphas8bit + '._- ') eol = Word('\n\r').suppress() eol_comma = Word('\n\r,').suppress() comment = (Literal('#') | Literal('//')) - SkipTo(eol | StringEnd()) comment_eol = Suppress(Optional(eol_comma) + comment) comment_no_comma_eol = (comment | eol).suppress() number_expr = Regex( '[+-]?(\d*\.\d+|\d+(\.\d+)?)([eE]\d+)?(?=$|[ \t]*([\$\}\],#\n\r]|//))', re.DOTALL).setParseAction(convert_number) # multi line string using """ # Using fix described in http://pyparsing.wikispaces.com/share/view/3778969 multiline_string = Regex( '""".*?"*"""', re.DOTALL | re.UNICODE).setParseAction(parse_multi_string) # single quoted line string quoted_string = Regex('".*?"[ \t]*', re.UNICODE).setParseAction(create_quoted_string) # unquoted string that takes the rest of the line until an optional comment # we support .properties multiline support which is like this: # line1 \ # line2 \ # so a backslash precedes the \n unquoted_string = Regex('(?:\\\\|[^\[\{\s\]\}#,=\$])+[ \t]*' ).setParseAction(unescape_string) substitution_expr = Regex('[ \t]*\$\{[^\}]+\}[ \t]*').setParseAction( create_substitution) string_expr = multiline_string | quoted_string | unquoted_string value_expr = number_expr | true_expr | false_expr | null_expr | string_expr include_content = (quoted_string | ( (Keyword('url') | Keyword('file')) - Literal('(').suppress() - quoted_string - Literal(')').suppress())) include_expr = ( Keyword("include", caseless=True).suppress() + (include_content | (Keyword("required") - Literal('(').suppress() - include_content - Literal(')').suppress()))).setParseAction(include_config) root_dict_expr = Forward() dict_expr = Forward() list_expr = Forward() multi_value_expr = ZeroOrMore(comment_eol | include_expr | substitution_expr | dict_expr | list_expr | value_expr | (Literal('\\') - eol).suppress()) # for a dictionary : or = is optional # last zeroOrMore is because we can have t = {a:4} {b: 6} {c: 7} which is dictionary concatenation inside_dict_expr = ConfigTreeParser( ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma)) inside_root_dict_expr = ConfigTreeParser( ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma), root=True) dict_expr << Suppress('{') - inside_dict_expr - Suppress('}') root_dict_expr << Suppress('{') - inside_root_dict_expr - Suppress('}') list_entry = ConcatenatedValueParser(multi_value_expr) list_expr << Suppress('[') - ListParser( list_entry - ZeroOrMore(eol_comma - list_entry)) - Suppress(']') # special case when we have a value assignment where the string can potentially be the remainder of the line assign_expr << Group(key - ZeroOrMore(comment_no_comma_eol) - (dict_expr | (Literal('=') | Literal(':') | Literal('+=')) - ZeroOrMore(comment_no_comma_eol) - ConcatenatedValueParser(multi_value_expr))) # the file can be { ... } where {} can be omitted or [] config_expr = ZeroOrMore(comment_eol | eol) + ( list_expr | root_dict_expr | inside_root_dict_expr) + ZeroOrMore(comment_eol | eol_comma) config = config_expr.parseString(content, parseAll=True)[0] if resolve: ConfigParser.resolve_substitutions(config) return config
def get_symptom(self): loglevel = LineStart() + Word(nums) analyze_expression = Combine( Regex(".*Exception:") + SkipTo(Suppress(loglevel), include=True)) return analyze_expression
def Verilog_BNF(): global verilogbnf if verilogbnf is None: # compiler directives compilerDirective = Combine( "`" + \ oneOf("define undef ifdef else endif default_nettype " "include resetall timescale unconnected_drive " "nounconnected_drive celldefine endcelldefine") + \ restOfLine ).setName("compilerDirective") # primitives SEMI,COLON,LPAR,RPAR,LBRACE,RBRACE,LBRACK,RBRACK,DOT,COMMA,EQ = map(Literal,";:(){}[].,=") identLead = alphas+"$_" identBody = alphanums+"$_" identifier1 = Regex( r"\.?["+identLead+"]["+identBody+r"]*(\.["+identLead+"]["+identBody+"]*)*" ).setName("baseIdent") identifier2 = Regex(r"\\\S+").setParseAction(lambda t:t[0][1:]).setName("escapedIdent")#.setDebug() identifier = identifier1 | identifier2 assert(identifier2 == r'\abc') hexnums = nums + "abcdefABCDEF" + "_?" base = Regex("'[bBoOdDhH]").setName("base") basedNumber = Combine( Optional( Word(nums + "_") ) + base + Word(hexnums+"xXzZ"), joinString=" ", adjacent=False ).setName("basedNumber") #~ number = ( basedNumber | Combine( Word( "+-"+spacedNums, spacedNums ) + #~ Optional( DOT + Optional( Word( spacedNums ) ) ) + #~ Optional( e + Word( "+-"+spacedNums, spacedNums ) ) ).setName("numeric") ) number = ( basedNumber | \ Regex(r"[+-]?[0-9_]+(\.[0-9_]*)?([Ee][+-]?[0-9_]+)?") \ ).setName("numeric") #~ decnums = nums + "_" #~ octnums = "01234567" + "_" expr = Forward().setName("expr") concat = Group( LBRACE + delimitedList( expr ) + RBRACE ) multiConcat = Group("{" + expr + concat + "}").setName("multiConcat") funcCall = Group(identifier + LPAR + Optional( delimitedList( expr ) ) + RPAR).setName("funcCall") subscrRef = Group(LBRACK + delimitedList( expr, COLON ) + RBRACK) subscrIdentifier = Group( identifier + Optional( subscrRef ) ) #~ scalarConst = "0" | (( FollowedBy('1') + oneOf("1'b0 1'b1 1'bx 1'bX 1'B0 1'B1 1'Bx 1'BX 1") )) scalarConst = Regex("0|1('[Bb][01xX])?") mintypmaxExpr = Group( expr + COLON + expr + COLON + expr ).setName("mintypmax") primary = ( number | (LPAR + mintypmaxExpr + RPAR ) | ( LPAR + Group(expr) + RPAR ).setName("nestedExpr") | multiConcat | concat | dblQuotedString | funcCall | subscrIdentifier ) unop = oneOf( "+ - ! ~ & ~& | ^| ^ ~^" ).setName("unop") binop = oneOf( "+ - * / % == != === !== && " "|| < <= > >= & | ^ ^~ >> << ** <<< >>>" ).setName("binop") expr << ( ( unop + expr ) | # must be first! ( primary + "?" + expr + COLON + expr ) | ( primary + Optional( binop + expr ) ) ) lvalue = subscrIdentifier | concat # keywords if_ = Keyword("if") else_ = Keyword("else") edge = Keyword("edge") posedge = Keyword("posedge") negedge = Keyword("negedge") specify = Keyword("specify") endspecify = Keyword("endspecify") fork = Keyword("fork") join = Keyword("join") begin = Keyword("begin") end = Keyword("end") default = Keyword("default") forever = Keyword("forever") repeat = Keyword("repeat") while_ = Keyword("while") for_ = Keyword("for") case = oneOf( "case casez casex" ) endcase = Keyword("endcase") wait = Keyword("wait") disable = Keyword("disable") deassign = Keyword("deassign") force = Keyword("force") release = Keyword("release") assign = Keyword("assign") eventExpr = Forward() eventTerm = ( posedge + expr ) | ( negedge + expr ) | expr | ( LPAR + eventExpr + RPAR ) eventExpr << ( Group( delimitedList( eventTerm, Keyword("or") ) ) ) eventControl = Group( "@" + ( ( LPAR + eventExpr + RPAR ) | identifier | "*" ) ).setName("eventCtrl") delayArg = ( number | Word(alphanums+"$_") | #identifier | ( LPAR + Group( delimitedList( mintypmaxExpr | expr ) ) + RPAR ) ).setName("delayArg")#.setDebug() delay = Group( "#" + delayArg ).setName("delay")#.setDebug() delayOrEventControl = delay | eventControl assgnmt = Group( lvalue + EQ + Optional( delayOrEventControl ) + expr ).setName( "assgnmt" ) nbAssgnmt = Group(( lvalue + "<=" + Optional( delay ) + expr ) | ( lvalue + "<=" + Optional( eventControl ) + expr )).setName( "nbassgnmt" ) range = LBRACK + expr + COLON + expr + RBRACK paramAssgnmt = Group( identifier + EQ + expr ).setName("paramAssgnmt") parameterDecl = Group( "parameter" + Optional( range ) + delimitedList( paramAssgnmt ) + SEMI).setName("paramDecl") inputDecl = Group( "input" + Optional( range ) + delimitedList( identifier ) + SEMI ) outputDecl = Group( "output" + Optional( range ) + delimitedList( identifier ) + SEMI ) inoutDecl = Group( "inout" + Optional( range ) + delimitedList( identifier ) + SEMI ) regIdentifier = Group( identifier + Optional( LBRACK + expr + COLON + expr + RBRACK ) ) regDecl = Group( "reg" + Optional("signed") + Optional( range ) + delimitedList( regIdentifier ) + SEMI ).setName("regDecl") timeDecl = Group( "time" + delimitedList( regIdentifier ) + SEMI ) integerDecl = Group( "integer" + delimitedList( regIdentifier ) + SEMI ) strength0 = oneOf("supply0 strong0 pull0 weak0 highz0") strength1 = oneOf("supply1 strong1 pull1 weak1 highz1") driveStrength = Group( LPAR + ( ( strength0 + COMMA + strength1 ) | ( strength1 + COMMA + strength0 ) ) + RPAR ).setName("driveStrength") nettype = oneOf("wire tri tri1 supply0 wand triand tri0 supply1 wor trior trireg") expandRange = Optional( oneOf("scalared vectored") ) + range realDecl = Group( "real" + delimitedList( identifier ) + SEMI ) eventDecl = Group( "event" + delimitedList( identifier ) + SEMI ) blockDecl = ( parameterDecl | regDecl | integerDecl | realDecl | timeDecl | eventDecl ) stmt = Forward().setName("stmt")#.setDebug() stmtOrNull = stmt | SEMI caseItem = ( delimitedList( expr ) + COLON + stmtOrNull ) | \ ( default + Optional(":") + stmtOrNull ) stmt << Group( ( begin + Group( ZeroOrMore( stmt ) ) + end ).setName("begin-end") | ( if_ + Group(LPAR + expr + RPAR) + stmtOrNull + Optional( else_ + stmtOrNull ) ).setName("if") | ( delayOrEventControl + stmtOrNull ) | ( case + LPAR + expr + RPAR + OneOrMore( caseItem ) + endcase ) | ( forever + stmt ) | ( repeat + LPAR + expr + RPAR + stmt ) | ( while_ + LPAR + expr + RPAR + stmt ) | ( for_ + LPAR + assgnmt + SEMI + Group( expr ) + SEMI + assgnmt + RPAR + stmt ) | ( fork + ZeroOrMore( stmt ) + join ) | ( fork + COLON + identifier + ZeroOrMore( blockDecl ) + ZeroOrMore( stmt ) + end ) | ( wait + LPAR + expr + RPAR + stmtOrNull ) | ( "->" + identifier + SEMI ) | ( disable + identifier + SEMI ) | ( assign + assgnmt + SEMI ) | ( deassign + lvalue + SEMI ) | ( force + assgnmt + SEMI ) | ( release + lvalue + SEMI ) | ( begin + COLON + identifier + ZeroOrMore( blockDecl ) + ZeroOrMore( stmt ) + end ).setName("begin:label-end") | # these *have* to go at the end of the list!!! ( assgnmt + SEMI ) | ( nbAssgnmt + SEMI ) | ( Combine( Optional("$") + identifier ) + Optional( LPAR + delimitedList(expr|empty) + RPAR ) + SEMI ) ).setName("stmtBody") """ x::=<blocking_assignment> ; x||= <non_blocking_assignment> ; x||= if ( <expression> ) <statement_or_null> x||= if ( <expression> ) <statement_or_null> else <statement_or_null> x||= case ( <expression> ) <case_item>+ endcase x||= casez ( <expression> ) <case_item>+ endcase x||= casex ( <expression> ) <case_item>+ endcase x||= forever <statement> x||= repeat ( <expression> ) <statement> x||= while ( <expression> ) <statement> x||= for ( <assignment> ; <expression> ; <assignment> ) <statement> x||= <delay_or_event_control> <statement_or_null> x||= wait ( <expression> ) <statement_or_null> x||= -> <name_of_event> ; x||= <seq_block> x||= <par_block> x||= <task_enable> x||= <system_task_enable> x||= disable <name_of_task> ; x||= disable <name_of_block> ; x||= assign <assignment> ; x||= deassign <lvalue> ; x||= force <assignment> ; x||= release <lvalue> ; """ alwaysStmt = Group( "always" + Optional(eventControl) + stmt ).setName("alwaysStmt") initialStmt = Group( "initial" + stmt ).setName("initialStmt") chargeStrength = Group( LPAR + oneOf( "small medium large" ) + RPAR ).setName("chargeStrength") continuousAssign = Group( assign + Optional( driveStrength ) + Optional( delay ) + delimitedList( assgnmt ) + SEMI ).setName("continuousAssign") tfDecl = ( parameterDecl | inputDecl | outputDecl | inoutDecl | regDecl | timeDecl | integerDecl | realDecl ) functionDecl = Group( "function" + Optional( range | "integer" | "real" ) + identifier + SEMI + Group( OneOrMore( tfDecl ) ) + Group( ZeroOrMore( stmt ) ) + "endfunction" ) inputOutput = oneOf("input output") netDecl1Arg = ( nettype + Optional( expandRange ) + Optional( delay ) + Group( delimitedList( ~inputOutput + identifier ) ) ) netDecl2Arg = ( "trireg" + Optional( chargeStrength ) + Optional( expandRange ) + Optional( delay ) + Group( delimitedList( ~inputOutput + identifier ) ) ) netDecl3Arg = ( nettype + Optional( driveStrength ) + Optional( expandRange ) + Optional( delay ) + Group( delimitedList( assgnmt ) ) ) netDecl1 = Group(netDecl1Arg + SEMI).setName("netDecl1") netDecl2 = Group(netDecl2Arg + SEMI).setName("netDecl2") netDecl3 = Group(netDecl3Arg + SEMI).setName("netDecl3") gateType = oneOf("and nand or nor xor xnor buf bufif0 bufif1 " "not notif0 notif1 pulldown pullup nmos rnmos " "pmos rpmos cmos rcmos tran rtran tranif0 " "rtranif0 tranif1 rtranif1" ) gateInstance = Optional( Group( identifier + Optional( range ) ) ) + \ LPAR + Group( delimitedList( expr ) ) + RPAR gateDecl = Group( gateType + Optional( driveStrength ) + Optional( delay ) + delimitedList( gateInstance) + SEMI ) udpInstance = Group( Group( identifier + Optional(range | subscrRef) ) + LPAR + Group( delimitedList( expr ) ) + RPAR ) udpInstantiation = Group( identifier - Optional( driveStrength ) + Optional( delay ) + delimitedList( udpInstance ) + SEMI ).setName("udpInstantiation") parameterValueAssignment = Group( Literal("#") + LPAR + Group( delimitedList( expr ) ) + RPAR ) namedPortConnection = Group( DOT + identifier + LPAR + expr + RPAR ).setName("namedPortConnection")#.setDebug() assert(r'.\abc (abc )' == namedPortConnection) modulePortConnection = expr | empty #~ moduleInstance = Group( Group ( identifier + Optional(range) ) + #~ ( delimitedList( modulePortConnection ) | #~ delimitedList( namedPortConnection ) ) ) inst_args = Group( LPAR + (delimitedList( namedPortConnection ) | delimitedList( modulePortConnection )) + RPAR).setName("inst_args") moduleInstance = Group( Group ( identifier + Optional(range) ) + inst_args ).setName("moduleInstance")#.setDebug() moduleInstantiation = Group( identifier + Optional( parameterValueAssignment ) + delimitedList( moduleInstance ).setName("moduleInstanceList") + SEMI ).setName("moduleInstantiation") parameterOverride = Group( "defparam" + delimitedList( paramAssgnmt ) + SEMI ) task = Group( "task" + identifier + SEMI + ZeroOrMore( tfDecl ) + stmtOrNull + "endtask" ) specparamDecl = Group( "specparam" + delimitedList( paramAssgnmt ) + SEMI ) pathDescr1 = Group( LPAR + subscrIdentifier + "=>" + subscrIdentifier + RPAR ) pathDescr2 = Group( LPAR + Group( delimitedList( subscrIdentifier ) ) + "*>" + Group( delimitedList( subscrIdentifier ) ) + RPAR ) pathDescr3 = Group( LPAR + Group( delimitedList( subscrIdentifier ) ) + "=>" + Group( delimitedList( subscrIdentifier ) ) + RPAR ) pathDelayValue = Group( ( LPAR + Group( delimitedList( mintypmaxExpr | expr ) ) + RPAR ) | mintypmaxExpr | expr ) pathDecl = Group( ( pathDescr1 | pathDescr2 | pathDescr3 ) + EQ + pathDelayValue + SEMI ).setName("pathDecl") portConditionExpr = Forward() portConditionTerm = Optional(unop) + subscrIdentifier portConditionExpr << portConditionTerm + Optional( binop + portConditionExpr ) polarityOp = oneOf("+ -") levelSensitivePathDecl1 = Group( if_ + Group(LPAR + portConditionExpr + RPAR) + subscrIdentifier + Optional( polarityOp ) + "=>" + subscrIdentifier + EQ + pathDelayValue + SEMI ) levelSensitivePathDecl2 = Group( if_ + Group(LPAR + portConditionExpr + RPAR) + LPAR + Group( delimitedList( subscrIdentifier ) ) + Optional( polarityOp ) + "*>" + Group( delimitedList( subscrIdentifier ) ) + RPAR + EQ + pathDelayValue + SEMI ) levelSensitivePathDecl = levelSensitivePathDecl1 | levelSensitivePathDecl2 edgeIdentifier = posedge | negedge edgeSensitivePathDecl1 = Group( Optional( if_ + Group(LPAR + expr + RPAR) ) + LPAR + Optional( edgeIdentifier ) + subscrIdentifier + "=>" + LPAR + subscrIdentifier + Optional( polarityOp ) + COLON + expr + RPAR + RPAR + EQ + pathDelayValue + SEMI ) edgeSensitivePathDecl2 = Group( Optional( if_ + Group(LPAR + expr + RPAR) ) + LPAR + Optional( edgeIdentifier ) + subscrIdentifier + "*>" + LPAR + delimitedList( subscrIdentifier ) + Optional( polarityOp ) + COLON + expr + RPAR + RPAR + EQ + pathDelayValue + SEMI ) edgeSensitivePathDecl = edgeSensitivePathDecl1 | edgeSensitivePathDecl2 edgeDescr = oneOf("01 10 0x x1 1x x0").setName("edgeDescr") timCheckEventControl = Group( posedge | negedge | (edge + LBRACK + delimitedList( edgeDescr ) + RBRACK )) timCheckCond = Forward() timCondBinop = oneOf("== === != !==") timCheckCondTerm = ( expr + timCondBinop + scalarConst ) | ( Optional("~") + expr ) timCheckCond << ( ( LPAR + timCheckCond + RPAR ) | timCheckCondTerm ) timCheckEvent = Group( Optional( timCheckEventControl ) + subscrIdentifier + Optional( "&&&" + timCheckCond ) ) timCheckLimit = expr controlledTimingCheckEvent = Group( timCheckEventControl + subscrIdentifier + Optional( "&&&" + timCheckCond ) ) notifyRegister = identifier systemTimingCheck1 = Group( "$setup" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional( COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck2 = Group( "$hold" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional( COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck3 = Group( "$period" + LPAR + controlledTimingCheckEvent + COMMA + timCheckLimit + Optional( COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck4 = Group( "$width" + LPAR + controlledTimingCheckEvent + COMMA + timCheckLimit + Optional( COMMA + expr + COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck5 = Group( "$skew" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional( COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck6 = Group( "$recovery" + LPAR + controlledTimingCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional( COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck7 = Group( "$setuphold" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + COMMA + timCheckLimit + Optional( COMMA + notifyRegister ) + RPAR + SEMI ) systemTimingCheck = (FollowedBy('$') + ( systemTimingCheck1 | systemTimingCheck2 | systemTimingCheck3 | systemTimingCheck4 | systemTimingCheck5 | systemTimingCheck6 | systemTimingCheck7 )).setName("systemTimingCheck") sdpd = if_ + Group(LPAR + expr + RPAR) + \ ( pathDescr1 | pathDescr2 ) + EQ + pathDelayValue + SEMI specifyItem = ~Keyword("endspecify") +( specparamDecl | pathDecl | levelSensitivePathDecl | edgeSensitivePathDecl | systemTimingCheck | sdpd ) """ x::= <specparam_declaration> x||= <path_declaration> x||= <level_sensitive_path_declaration> x||= <edge_sensitive_path_declaration> x||= <system_timing_check> x||= <sdpd> """ specifyBlock = Group( "specify" + ZeroOrMore( specifyItem ) + "endspecify" ).setName("specifyBlock") moduleItem = ~Keyword("endmodule") + ( parameterDecl | inputDecl | outputDecl | inoutDecl | regDecl | netDecl3 | netDecl1 | netDecl2 | timeDecl | integerDecl | realDecl | eventDecl | gateDecl | parameterOverride | continuousAssign | specifyBlock | initialStmt | alwaysStmt | task | functionDecl | # these have to be at the end - they start with identifiers moduleInstantiation | udpInstantiation ) """ All possible moduleItems, from Verilog grammar spec x::= <parameter_declaration> x||= <input_declaration> x||= <output_declaration> x||= <inout_declaration> ?||= <net_declaration> (spec does not seem consistent for this item) x||= <reg_declaration> x||= <time_declaration> x||= <integer_declaration> x||= <real_declaration> x||= <event_declaration> x||= <gate_declaration> x||= <UDP_instantiation> x||= <module_instantiation> x||= <parameter_override> x||= <continuous_assign> x||= <specify_block> x||= <initial_statement> x||= <always_statement> x||= <task> x||= <function> """ portRef = subscrIdentifier portExpr = portRef | Group( LBRACE + delimitedList( portRef ) + RBRACE ) port = portExpr | Group( ( DOT + identifier + LPAR + portExpr + RPAR ) ) moduleHdr = Group ( oneOf("module macromodule") + identifier + Optional( LPAR + Group( Optional( delimitedList( Group(oneOf("input output") + (netDecl1Arg | netDecl2Arg | netDecl3Arg) ) | port ) ) ) + RPAR ) + SEMI ).setName("moduleHdr") module = Group( moduleHdr + Group( ZeroOrMore( moduleItem ) ) + "endmodule" ).setName("module")#.setDebug() udpDecl = outputDecl | inputDecl | regDecl #~ udpInitVal = oneOf("1'b0 1'b1 1'bx 1'bX 1'B0 1'B1 1'Bx 1'BX 1 0 x X") udpInitVal = (Regex("1'[bB][01xX]") | Regex("[01xX]")).setName("udpInitVal") udpInitialStmt = Group( "initial" + identifier + EQ + udpInitVal + SEMI ).setName("udpInitialStmt") levelSymbol = oneOf("0 1 x X ? b B") levelInputList = Group( OneOrMore( levelSymbol ).setName("levelInpList") ) outputSymbol = oneOf("0 1 x X") combEntry = Group( levelInputList + COLON + outputSymbol + SEMI ) edgeSymbol = oneOf("r R f F p P n N *") edge = Group( LPAR + levelSymbol + levelSymbol + RPAR ) | \ Group( edgeSymbol ) edgeInputList = Group( ZeroOrMore( levelSymbol ) + edge + ZeroOrMore( levelSymbol ) ) inputList = levelInputList | edgeInputList seqEntry = Group( inputList + COLON + levelSymbol + COLON + ( outputSymbol | "-" ) + SEMI ).setName("seqEntry") udpTableDefn = Group( "table" + OneOrMore( combEntry | seqEntry ) + "endtable" ).setName("table") """ <UDP> ::= primitive <name_of_UDP> ( <name_of_variable> <,<name_of_variable>>* ) ; <UDP_declaration>+ <UDP_initial_statement>? <table_definition> endprimitive """ udp = Group( "primitive" + identifier + LPAR + Group( delimitedList( identifier ) ) + RPAR + SEMI + OneOrMore( udpDecl ) + Optional( udpInitialStmt ) + udpTableDefn + "endprimitive" ) verilogbnf = OneOrMore( module | udp ) + StringEnd() verilogbnf.ignore( cppStyleComment ) verilogbnf.ignore( compilerDirective ) return verilogbnf
""""Common pyparsing grammar patterns.""" from pyparsing import alphas, nums from pyparsing import Group, OneOrMore, Optional, Regex, Suppress, Word import re pos_neg_int_number = Word('+-' + nums).setParseAction( lambda t: [int(t[0])]) # '+3' or '-2' are examples # matching float w/ regex is ugly but is recommended by pyparsing regex_after_decimal = r'([0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)' float_number = Regex(r'[-+]?([0-9]+\.(?!([0-9]|[eE])))|{0}'.format(regex_after_decimal)) \ .setParseAction(lambda t: [float(t[0])]) chemical_formula = Group(OneOrMore(Word(alphas, min=1, max=2) + Optional(float_number, default=1.0))) + \ Optional(Suppress('/') + pos_neg_int_number, default=0) reg_symbol = r'([A-z][A-z]?)' reg_amount = r'([-+]?([0-9]+\.(?!([0-9]|[eE])))|([0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?))?' reg_charge = r'/([+-]?[0-9]+)' chem_regex = reg_symbol + reg_amount def parse_chemical_formula(formula): """""" matches = re.findall(chem_regex, formula) sym_amnts = [(m[0], float(m[1]) if m[1] != '' else 1.0) for m in matches] charge = re.search(reg_charge, formula) if charge is None: charge = 0 else: charge = int(charge.groups()[0])
val1 = val1 and val2 if not val1: return val1 return val1 word_characters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_0123456789' expr = Forward() # define the parser integer = Word(nums) real = Combine(Word(nums) + "." + Word(nums)) constant = oneOf('True False None yes no') + WordEnd(word_characters) variable = Regex(r'([a-zA-Z0-9\._]+)') explicit_variable = '$' + Regex(r'([a-zA-Z0-9\._]+)') current_scope = Literal('$$') string = (QuotedString("'''", escChar=None, unquoteResults=True) | QuotedString('"""', escChar=None, unquoteResults=True) | QuotedString('"', escChar="\\", unquoteResults=True) | QuotedString('\'', escChar="\\", unquoteResults=True)) regexp = QuotedString('/', escChar=None) timespan = Combine(Word(nums) + oneOf('ms s m h d')) current_scope_operand = current_scope variable_operand = variable explicit_variable_operand = explicit_variable integer_operand = integer real_operand = real
else: res += [b, rdflib.RDF.first, x] res += [b, rdflib.RDF.rest, rdflib.RDF.nil] res += other if DEBUG: print("CollectionOut", res) return [res] # SPARQL Grammar from http://www.w3.org/TR/sparql11-query/#grammar # ------ TERMINALS -------------- # [139] IRIREF ::= '<' ([^<>"{}|^`\]-[#x00-#x20])* '>' IRIREF = Combine( Suppress('<') + Regex(r'[^<>"{}|^`\\%s]*' % ''.join('\\x%02X' % i for i in range(33))) + Suppress('>')) IRIREF.setParseAction(lambda x: rdflib.URIRef(x[0])) # [164] P_CHARS_BASE ::= [A-Z] | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] if sys.maxunicode == 0xffff: # this is narrow python build (default on windows/osx) # this means that unicode code points over 0xffff are stored # as several characters, which in turn means that regex character # ranges with these characters do not work. # See # * http://bugs.python.org/issue12729 # * http://bugs.python.org/issue12749 # * http://bugs.python.org/issue3665 #
def define_dot_parser(self): """Define dot grammar Based on the grammar http://www.graphviz.org/doc/info/lang.html """ # punctuation colon = Literal(":") lbrace = Suppress("{") rbrace = Suppress("}") lbrack = Suppress("[") rbrack = Suppress("]") lparen = Literal("(") rparen = Literal(")") equals = Suppress("=") comma = Literal(",") dot = Literal(".") slash = Literal("/") bslash = Literal("\\") star = Literal("*") semi = Suppress(";") at = Literal("@") minus = Literal("-") pluss = Suppress("+") # keywords strict_ = CaselessLiteral("strict") graph_ = CaselessLiteral("graph") digraph_ = CaselessLiteral("digraph") subgraph_ = CaselessLiteral("subgraph") node_ = CaselessLiteral("node") edge_ = CaselessLiteral("edge") punctuation_ = "".join([c for c in string.punctuation if c not in '_' ]) + string.whitespace # token definitions identifier = Word(alphanums + "_").setName("identifier") # double_quoted_string = QuotedString('"', multiline=True,escChar='\\', # unquoteResults=True) # dblQuotedString double_quoted_string = Regex(r'\"(?:\\\"|\\\\|[^"])*\"', re.MULTILINE) double_quoted_string.setParseAction(removeQuotes) quoted_string = Combine( double_quoted_string + Optional(OneOrMore(pluss + double_quoted_string)), adjacent=False) alphastring_ = OneOrMore(CharsNotIn(punctuation_)) def parse_html(s, loc, toks): return '<<%s>>' % ''.join(toks[0]) opener = '<' closer = '>' try: html_text = pyparsing.nestedExpr( opener, closer, ((CharsNotIn(opener + closer).setParseAction(lambda t: t[0])) )).setParseAction(parse_html) except: log.debug('nestedExpr not available.') log.warning('Old version of pyparsing detected. Version 1.4.8 or ' 'later is recommended. Parsing of html labels may not ' 'work properly.') html_text = Combine(Literal("<<") + OneOrMore(CharsNotIn(",]"))) ID = ( alphastring_ | html_text | quoted_string | # .setParseAction(strip_quotes) | identifier).setName("ID") float_number = Combine(Optional(minus) + OneOrMore(Word(nums + "."))).setName("float_number") righthand_id = (float_number | ID).setName("righthand_id") port_angle = (at + ID).setName("port_angle") port_location = ((OneOrMore(Group(colon + ID)) | Group(colon + lparen + ID + comma + ID + rparen))).setName("port_location") port = Combine( (Group(port_location + Optional(port_angle)) | Group(port_angle + Optional(port_location)))).setName("port") node_id = (ID + Optional(port)) a_list = OneOrMore(ID + Optional(equals + righthand_id) + Optional(comma.suppress())).setName("a_list") attr_list = OneOrMore(lbrack + Optional(a_list) + rbrack).setName( "attr_list").setResultsName('attrlist') attr_stmt = ((graph_ | node_ | edge_) + attr_list).setName("attr_stmt") edgeop = (Literal("--") | Literal("->")).setName("edgeop") stmt_list = Forward() graph_stmt = (lbrace + Optional(stmt_list) + rbrace + Optional(semi)).setName("graph_stmt") edge_point = Forward() edgeRHS = OneOrMore(edgeop + edge_point) edge_stmt = edge_point + edgeRHS + Optional(attr_list) subgraph = ( Optional(subgraph_, '') + Optional(ID, '') + Group(graph_stmt)).setName("subgraph").setResultsName('ssubgraph') edge_point <<= (subgraph | graph_stmt | node_id) node_stmt = (node_id + Optional(attr_list) + Optional(semi)).setName("node_stmt") assignment = (ID + equals + righthand_id).setName("assignment") stmt = (assignment | edge_stmt | attr_stmt | subgraph | graph_stmt | node_stmt).setName("stmt") stmt_list <<= OneOrMore(stmt + Optional(semi)) graphparser = ((Optional(strict_, 'notstrict') + ((graph_ | digraph_)) + Optional(ID, '') + lbrace + Group(Optional(stmt_list)) + rbrace).setResultsName("graph")) singleLineComment = Group("//" + restOfLine) | Group("#" + restOfLine) # actions graphparser.ignore(singleLineComment) graphparser.ignore(cStyleComment) node_id.setParseAction(self._proc_node_id) assignment.setParseAction(self._proc_attr_assignment) a_list.setParseAction(self._proc_attr_list) edge_stmt.setParseAction(self._proc_edge_stmt) node_stmt.setParseAction(self._proc_node_stmt) attr_stmt.setParseAction(self._proc_default_attr_stmt) attr_list.setParseAction(self._proc_attr_list_combine) subgraph.setParseAction(self._proc_subgraph_stmt) # graph_stmt.setParseAction(self._proc_graph_stmt) graphparser.setParseAction(self._main_graph_stmt) return graphparser
def __eq__(self, other): return self.name == other.name # Character literals LCURLY, RCURLY, LPAREN, RPAREN, QUOTE, COMMA, AT, EQUALS, HASH = map( Suppress, '{}()",@=#') def bracketed(expr): """ Return matcher for `expr` between curly brackets or parentheses """ return (LPAREN + expr + RPAREN) | (LCURLY + expr + RCURLY) # Define parser components for strings (the hard bit) chars_no_curly = Regex(r"[^{}]+") chars_no_curly.leaveWhitespace() chars_no_quotecurly = Regex(r'[^"{}]+') chars_no_quotecurly.leaveWhitespace() # Curly string is some stuff without curlies, or nested curly sequences curly_string = Forward() curly_item = Group(curly_string) | chars_no_curly curly_string << LCURLY + ZeroOrMore(curly_item) + RCURLY # quoted string is either just stuff within quotes, or stuff within quotes, within # which there is nested curliness quoted_item = Group(curly_string) | chars_no_quotecurly quoted_string = QUOTE + ZeroOrMore(quoted_item) + QUOTE # Numbers can just be numbers. Only integers though. number = Regex("[0-9]+")
LBRACKET = L("[").suppress() RBRACKET = L("]").suppress() LPAREN = L("(").suppress() RPAREN = L(")").suppress() COMMA = L(",").suppress() SEMICOLON = L(";").suppress() AT = L("@").suppress() PUNCTUATION = Word("-_.") IDENTIFIER_END = ALPHANUM | (ZeroOrMore(PUNCTUATION) + ALPHANUM) IDENTIFIER = Combine(ALPHANUM + ZeroOrMore(IDENTIFIER_END)) NAME = IDENTIFIER("name") EXTRA = IDENTIFIER URI = Regex(r"[^ ]+")("url") URL = AT + URI EXTRAS_LIST = EXTRA + ZeroOrMore(COMMA + EXTRA) EXTRAS = (LBRACKET + Optional(EXTRAS_LIST) + RBRACKET)("extras") VERSION_PEP440 = Regex(REGEX, re.VERBOSE | re.IGNORECASE) VERSION_LEGACY = Regex(LEGACY_REGEX, re.VERBOSE | re.IGNORECASE) VERSION_ONE = VERSION_PEP440 ^ VERSION_LEGACY VERSION_MANY = Combine(VERSION_ONE + ZeroOrMore(COMMA + VERSION_ONE), joinString=",", adjacent=False)("_raw_spec") _VERSION_SPEC = Optional(((LPAREN + VERSION_MANY + RPAREN) | VERSION_MANY)) _VERSION_SPEC.setParseAction(lambda s, l, t: t._raw_spec or "")
TO = Keyword('TO') query_expr = Forward() required_modifier = Literal('+')('required') prohibit_modifier = Literal('-')('prohibit') special_characters = '=><(){}[]^"~*?:\\/.&|' valid_word = Word(printables, excludeChars=special_characters).setName('word') valid_word.setParseAction(lambda t: t[0].replace('\\\\', chr(127)).replace( '\\', '').replace(chr(127), '\\')) clause = Forward() field_name = (Optional(valid_word()('attr') + DOT)) + valid_word()('fieldname') single_term = valid_word()('singleterm') phrase = QuotedString('"', unquoteResults=True)('phrase') wildcard = Regex(r'[a-z0-9]*[\?\*][a-z0-9]*')('wildcard') wildcard.setParseAction(lambda t: t[0].replace('?', '.?').replace('*', '.*')) regex = QuotedString('/', unquoteResults=True)('regex') _all = Literal('*') lower_range = Group((LBRACK('inclusive') | LBRACE('exclusive')) + (valid_word | _all)('lowerbound')) upper_range = Group((valid_word | _all)('upperbound') + (RBRACK('inclusive') | RBRACE('esclusive'))) _range = (lower_range + TO + upper_range)('range') GT = Literal('>') GTE = Literal('>=') LT = Literal('<') LTE = Literal('<=') one_sided_range = Group((GTE | GT | LTE | LT)('op') +
opAssoc, ParseException, ParserElement) ParserElement.enablePackrat() re_operator = re.compile(r'''^(?P<operator>.*?\()(?P<rest>object:.*)''') COLON, LBRACK, RBRACK, LBRACE, RBRACE, TILDE, CARAT = map(Literal, ":[]{}~^") LPAR, RPAR = map(Suppress, "()") and_ = CaselessKeyword("AND") or_ = CaselessKeyword("OR") not_ = CaselessKeyword("NOT") to_ = CaselessKeyword("TO") keyword = and_ | or_ | not_ expression = Forward() valid_word = Regex(r'([a-zA-Z0-9*_+.-]|\\[!(){}\[\]^"~*?\\:])+').setName( "word") valid_word.setParseAction(lambda t: t[0].replace('\\\\', chr(127)).replace( '\\', '').replace(chr(127), '\\')) string = QuotedString('"') required_modifier = Literal("+")("required") prohibit_modifier = Literal("-")("prohibit") integer = Regex(r"\d+").setParseAction(lambda t: int(t[0])) proximity_modifier = Group(TILDE + integer("proximity")) number = Regex(r'\d+(\.\d+)?').setParseAction(lambda t: float(t[0])) fuzzy_modifier = TILDE + Optional(number, default=0.5)("fuzzy") term = Forward() field_name = valid_word.copy().setName("fieldname") incl_range_search = Group(LBRACK + term("lower") + to_ + term("upper") +
number = Word(nums) integer = Combine(Optional(minus) + number) float_number = Combine(integer + Optional(point + Optional(number))).setParseAction( lambda t: float(t[0])) # (originally I had pyparsing pulling out the $(Macro) references from inside names # as well, but the framework doesn't work especially well without whitespace delimiters between # tokens so we just do simple find/replace in a second pass pv_name = Word(alphanums + ":._$()") pv_value = (float_number | Word(alphanums)) pv_assignment = pv_name + pv_value comment = Literal("#") + Regex(r".*") macro = Group(Word(alphas) + Literal("=").suppress() + pv_name) macros = Optional(macro + ZeroOrMore(Word(";,").suppress() + macro)) #file_include = Literal("file") + pv_name + macros file_include = Literal("file") + \ (file_name | ignored_quote + file_name + ignored_quote) \ + Optional(ignored_comma) + macros def line(contents): return LineStart() + ZeroOrMore(Group(contents)) + LineEnd().suppress() req_line = line(file_include | comment.suppress() | pv_name)
def command_log(*args): if len(args) > 2: raise Exception( "Too many arguments: [ip] [time period in s] (optional parameter)") ip = '' max_ago = float('inf') if len(args) >= 1: ip = args[0] if len(args) == 2: max_ago = float(args[1]) from pyparsing import Word, alphas, Suppress, Combine, nums, string, Optional, Regex, ParseException # define line in (sys)log month = Word(string.uppercase, string.lowercase, exact=3) integer = Word(nums) serverDateTime = Combine(month + " " + integer + " " + integer + ":" + integer + ":" + integer) hostname = Word(alphas + nums + "_" + "-") daemon = Word(alphas + nums + "/" + "-" + "_") + Optional(Suppress("[") + integer + Suppress("]")) + Suppress(":") message = Regex(".*") bnf = serverDateTime + hostname + daemon + message from collections import deque import re, time last_access = {} tail_n = 100 for line in deque(open(logfile_path), tail_n): try: fields = bnf.parseString(line) except ParseException: continue else: m = re.search('requests (\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3})', fields[-1]) if m: #print fields[0], m.group(1) cur = time.localtime() # guess year... st = time.strptime(fields[0] + " %s" % cur.tm_year, "%b %d %H:%M:%S %Y") if st > cur: # ok, re-guess st = time.strptime(fields[0] + " %s" % (cur.tm_year - 1), "%b %d %H:%M:%S %Y") if (st > cur): raise Exception("HMF logfile seems too old!?!") last_access[m.group(1)] = st ips = [key for key in last_access.keys() if ip in key] access_in_period = [((time.mktime(cur) - time.mktime(t)) <= max_ago) for r, t in last_access.items()] if ips and any(access_in_period): print "Previous accesses:" for (resource, timestamp), state in zip(last_access.items(), access_in_period): if not state: continue if not resource in ips: continue print "\t%s was accessed on %s (%.1fs ago)" % ( resource, time.asctime(timestamp), time.mktime(cur) - time.mktime(timestamp)) return EXIT_FAILURE return EXIT_SUCCESS
def property_grammar(): ParserElement.setDefaultWhitespaceChars(' ') dpi_setting = Group( Optional('*')('DEFAULT') + INTEGER('DPI') + Suppress('@') + INTEGER('HZ'))('SETTINGS*') mount_matrix_row = SIGNED_REAL + ',' + SIGNED_REAL + ',' + SIGNED_REAL mount_matrix = Group(mount_matrix_row + ';' + mount_matrix_row + ';' + mount_matrix_row)('MOUNT_MATRIX') xkb_setting = Optional(Word(alphanums + '+-/@._')) # Although this set doesn't cover all of characters in database entries, it's enough for test targets. name_literal = Word(printables + ' ') props = ( ('MOUSE_DPI', Group(OneOrMore(dpi_setting))), ('MOUSE_WHEEL_CLICK_ANGLE', INTEGER), ('MOUSE_WHEEL_CLICK_ANGLE_HORIZONTAL', INTEGER), ('MOUSE_WHEEL_CLICK_COUNT', INTEGER), ('MOUSE_WHEEL_CLICK_COUNT_HORIZONTAL', INTEGER), ('ID_AUTOSUSPEND', Or((Literal('0'), Literal('1')))), ('ID_INPUT', Or((Literal('0'), Literal('1')))), ('ID_INPUT_ACCELEROMETER', Or((Literal('0'), Literal('1')))), ('ID_INPUT_JOYSTICK', Or((Literal('0'), Literal('1')))), ('ID_INPUT_KEY', Or((Literal('0'), Literal('1')))), ('ID_INPUT_KEYBOARD', Or((Literal('0'), Literal('1')))), ('ID_INPUT_MOUSE', Or((Literal('0'), Literal('1')))), ('ID_INPUT_POINTINGSTICK', Or((Literal('0'), Literal('1')))), ('ID_INPUT_SWITCH', Or((Literal('0'), Literal('1')))), ('ID_INPUT_TABLET', Or((Literal('0'), Literal('1')))), ('ID_INPUT_TABLET_PAD', Or((Literal('0'), Literal('1')))), ('ID_INPUT_TOUCHPAD', Or((Literal('0'), Literal('1')))), ('ID_INPUT_TOUCHSCREEN', Or((Literal('0'), Literal('1')))), ('ID_INPUT_TRACKBALL', Or((Literal('0'), Literal('1')))), ('POINTINGSTICK_SENSITIVITY', INTEGER), ('POINTINGSTICK_CONST_ACCEL', REAL), ('ID_INPUT_JOYSTICK_INTEGRATION', Or(('internal', 'external'))), ('ID_INPUT_TOUCHPAD_INTEGRATION', Or(('internal', 'external'))), ('XKB_FIXED_LAYOUT', xkb_setting), ('XKB_FIXED_VARIANT', xkb_setting), ('XKB_FIXED_MODEL', xkb_setting), ('KEYBOARD_LED_NUMLOCK', Literal('0')), ('KEYBOARD_LED_CAPSLOCK', Literal('0')), ('ACCEL_MOUNT_MATRIX', mount_matrix), ('ACCEL_LOCATION', Or(('display', 'base'))), ('PROXIMITY_NEAR_LEVEL', INTEGER), ('IEEE1394_UNIT_FUNCTION_MIDI', Or((Literal('0'), Literal('1')))), ('IEEE1394_UNIT_FUNCTION_AUDIO', Or((Literal('0'), Literal('1')))), ('IEEE1394_UNIT_FUNCTION_VIDEO', Or((Literal('0'), Literal('1')))), ('ID_VENDOR_FROM_DATABASE', name_literal), ('ID_MODEL_FROM_DATABASE', name_literal), ) fixed_props = [ Literal(name)('NAME') - Suppress('=') - val('VALUE') for name, val in props ] kbd_props = [ Regex(r'KEYBOARD_KEY_[0-9a-f]+')('NAME') - Suppress('=') - ('!' ^ (Optional('!') - Word(alphanums + '_')))('VALUE') ] abs_props = [ Regex(r'EVDEV_ABS_[0-9a-f]{2}')('NAME') - Suppress('=') - Word(nums + ':')('VALUE') ] grammar = Or(fixed_props + kbd_props + abs_props) + EOL return grammar
def _get_grammar(): # type: () -> Dict[str, Any] ''' Create parser grammar dictionary. Returns: dict: Grammar. ''' project = Regex(r'[a-z]{3,4}\d{1,3}')\ .setResultsName('project')\ .setFailAction(AssetNameParser._raise_field_error('project', 'token')) specification = Regex(r'[a-z]{3,4}\d\d\d')\ .setResultsName('specification')\ .setFailAction(AssetNameParser._raise_field_error('specification', 'token')) descriptor = Regex(r'[a-z0-9][a-z0-9-]*')\ .setResultsName('descriptor')\ .setFailAction(AssetNameParser._raise_field_error('descriptor', 'token')) version = Regex(r'\d{' + str(AssetNameParser.VERSION_PADDING) + '}')\ .setParseAction(lambda s, l, t: int(t[0]))\ .setResultsName('version')\ .setFailAction(AssetNameParser._raise_field_error('version', 'token')) coord = Regex(r'\d{' + str(AssetNameParser.COORDINATE_PADDING) + '}')\ .setParseAction(lambda s, l, t: int(t[0])) t_sep = Suppress(AssetNameParser.TOKEN_SEPARATOR) coordinate = Group(coord + Optional(t_sep + coord) + Optional(t_sep + coord))\ .setResultsName('coordinate')\ .setFailAction(AssetNameParser._raise_field_error('coordinate', 'token')) frame = Regex(r'\d{' + str(AssetNameParser.FRAME_PADDING) + '}')\ .setParseAction(lambda s, l, t: int(t[0]))\ .setResultsName('frame')\ .setFailAction(AssetNameParser._raise_field_error('frame', 'token')) extension = Regex(r'[a-zA-Z0-9]+$')\ .setResultsName('extension')\ .setFailAction(AssetNameParser._raise_field_error('extension', 'token')) # ---------------------------------------------------------------------- project_indicator = Suppress(AssetNameParser.PROJECT_INDICATOR)\ .setFailAction(AssetNameParser._raise_field_error('project', 'indicator')) specification_indicator = Suppress(AssetNameParser.SPECIFICATION_INDICATOR)\ .setFailAction(AssetNameParser._raise_field_error('specification', 'indicator')) descriptor_indicator = Suppress(AssetNameParser.DESCRIPTOR_INDICATOR)\ .setFailAction(AssetNameParser._raise_field_error('descriptor', 'indicator')) version_indicator = Suppress(AssetNameParser.VERSION_INDICATOR)\ .setFailAction(AssetNameParser._raise_field_error('version', 'indicator')) coordinate_indicator = Suppress(AssetNameParser.COORDINATE_INDICATOR)\ .setFailAction(AssetNameParser._raise_field_error('coordinate', 'indicator')) frame_indicator = Suppress(AssetNameParser.FRAME_INDICATOR)\ .setFailAction(AssetNameParser._raise_field_error('frame', 'indicator')) extension_indicator = Suppress(AssetNameParser.EXTENSION_INDICATOR)\ .setFailAction(AssetNameParser._raise_field_error('extension', 'indicator')) # ---------------------------------------------------------------------- grammar = { 'project': project_indicator + project, 'specification': specification_indicator + specification, 'specification_token': specification, 'descriptor': descriptor_indicator + descriptor, 'version': version_indicator + version, 'coordinate': coordinate_indicator + coordinate, 'frame': frame_indicator + frame, 'extension': extension_indicator + extension, 'extension_token': extension, 'field_separator': Suppress(AssetNameParser.FIELD_SEPARATOR) } return grammar
# NUMBERS E = CaselessLiteral("E") # binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) arithSign = Word("+-", exact=1) realNum = Combine( Optional(arithSign) + (Word(nums) + "." + Optional(Word(nums)) | ("." + Word(nums))) + Optional(E + Optional(arithSign) + Word(nums))).addParseAction(unquote) intNum = Combine( Optional(arithSign) + Word(nums) + Optional(E + Optional("+") + Word(nums))).addParseAction(unquote) # STRINGS, NUMBERS, VARIABLES sqlString = Combine(Regex(r"\'(\'\'|\\.|[^'])*\'")).addParseAction(to_string) identString = Combine(Regex(r'\"(\"\"|\\.|[^"])*\"')).addParseAction(unquote) ident = Combine(~RESERVED + (delimitedList(Literal("*") | Word(alphas + "_", alphanums + "_$") | identString, delim=".", combine=True))).setName("identifier") # EXPRESSIONS expr = Forward() # CASE case = (CASE + Group( ZeroOrMore((WHEN + expr("when") + THEN + expr("then")).addParseAction(to_when_call)))("case") + Optional(ELSE + expr("else")) + END).addParseAction(to_case_call)
from pyparsing import Word, alphas, alphanums, nums, delimitedList, quotedString, Group,ParseException, Literal, Combine, Optional,Regex from pprint import pprint #define the grammar # The objects OBJECT = Word(alphas, alphanums+'_') OBJECTS = delimitedList(OBJECT('object')) OBJLIST = '{' + OBJECTS('objects') + '}' #The arguments KEY = Word(alphas) POINT = Literal('.') PLUSORMINUS = Literal('+') | Literal('-') NUMBER = Word(nums) INTEGER = Regex(r'[-+]?[0-9]*') FLOAT = Regex(r'[-+]?[0-9]*\.[0-9]+') STRING = quotedString() VALUE=STRING('string') | FLOAT('float') | INTEGER('integer') KVPAIR = Group(KEY('key')+'='+VALUE('value')) KVPAIRS = delimitedList(KVPAIR('kvpair')) KVLIST = '(' + KVPAIRS('kvpairs') + ')' # The command COMNAME = Word(alphas) COMMAND = OBJLIST('objlist') + COMNAME('comname') + Optional(KVLIST('kvlist')) COMSPEC = COMMAND('command') class ServerCommand: def __init__(self, commandLine): self.commandLine = commandLine
def create_parser(regexes, pattern_matchers, range_fillers, quote_pairs=[('\'', '\''), ('"', '"')], delimiters=[','], require_quotes=False, require_delimiter=False, allow_empty=True): if isinstance(regexes, string_types) or isinstance(regexes, re._pattern_type): regexes = [regexes] pattern_matchers = [pattern_matchers] range_fillers = [range_fillers] assert len(regexes) == len(pattern_matchers) assert len(regexes) == len(range_fillers) code_patterns = list( starmap( lambda regex, pattern_matcher: Regex(regex).setParseAction( lambda s, loc, toks: frozenset(pattern_matcher(toks[0]))), zip(regexes, pattern_matchers))) if require_quotes: quoted_code_patterns = [NoMatch() for _ in code_patterns] else: quoted_code_patterns = code_patterns for opener, closer in quote_pairs: quoted_code_patterns = list( starmap( lambda quoted_code_pattern, code_pattern: quoted_code_pattern | (Literal(opener).suppress() + code_pattern + Literal(closer). suppress()), zip(quoted_code_patterns, code_patterns))) code_ranges = map( lambda quoted_code_pattern: quoted_code_pattern + Literal('-'). suppress() + quoted_code_pattern, quoted_code_patterns) code_ranges = list( starmap( lambda code_range, range_filler: code_range. setParseAction(lambda s, loc, toks: frozenset( range_filler(toks[0], toks[1]))), zip(code_ranges, range_fillers))) quoted_code_ranges = [NoMatch() for _ in code_ranges] for opener, closer in quote_pairs: quoted_code_ranges = list( starmap( lambda quoted_code_range, code_pattern: quoted_code_range | (Literal(opener).suppress() + code_pattern + Literal('-'). suppress() + code_pattern + Literal(closer).suppress()), zip(quoted_code_ranges, code_patterns))) quoted_code_ranges = list( starmap( lambda quoted_code_range, range_filler: quoted_code_range. setParseAction(lambda s, loc, toks: frozenset( range_filler(toks[0], toks[1]))), zip(quoted_code_ranges, range_fillers))) any_code_ranges = list(starmap(or_, zip(quoted_code_ranges, code_ranges))) quoted_code_pattern = reduce(xor, quoted_code_patterns) any_code_range = reduce(xor, any_code_ranges) any_delim = reduce(xor, map(Literal, delimiters)) if allow_empty: any_delim = OneOrMore(any_delim) code_list_continuation = any_delim.suppress() + (any_code_range | quoted_code_pattern) if not require_delimiter: code_list_continuation |= White().suppress() + (any_code_range | quoted_code_pattern) code_list = (any_code_range | quoted_code_pattern) + ZeroOrMore( code_list_continuation) + Optional( reduce(or_, map(Literal, delimiters))).suppress() + StringEnd() return code_list
elif val.startswith('`') and val.endswith('`'): val = "'" + val[1:-1].replace("``", "`") + "'" elif val.startswith("+"): val = val[1:] un = ast.literal_eval(val) return un def to_string(instring, tokensStart, retTokens): val = retTokens[0] val = "'" + val[1:-1].replace("''", "\\'") + "'" return {"literal": ast.literal_eval(val)} # NUMBERS realNum = Regex(r"[+-]?(\d+\.\d*|\.\d+)([eE][+-]?\d+)?").addParseAction( unquote) intNum = Regex(r"[+-]?\d+([eE]\+?\d+)?").addParseAction(unquote) # STRINGS, NUMBERS, VARIABLES sqlString = Regex(r"\'(\'\'|\\.|[^'])*\'").addParseAction(to_string) identString = Regex(r'\"(\"\"|\\.|[^"])*\"').addParseAction(unquote) mysqlidentString = Regex(r'\`(\`\`|\\.|[^`])*\`').addParseAction(unquote) ident = Combine(~RESERVED + (delimitedList(Literal("*") | Word(alphas + "_", alphanums + "_$") | identString | mysqlidentString, delim=".", combine=True))).setName("identifier") # EXPRESSIONS expr = Forward()
from pyparsing import nums as digits from pyparsing import alphas, LineStart, LineEnd, Word, Literal, Regex, ZeroOrMore, OnlyOnce from collections import namedtuple whole_number = Word(digits) whole_number_ratio = whole_number + Literal("/") + whole_number rational_number = Regex("[0-9]+\.[0-9]+") unit = Regex("[BKMGT]") unit_ratio = whole_number + unit + Literal("/") + whole_number + unit file_name = Regex("[^\n\r]+") #TODO IMPROVE THIS mapped = Regex("[Oo ]*") mapped_pic = Literal("[") + mapped + Literal("]") fileline = file_name + LineEnd() mapline = mapped_pic + whole_number_ratio + LineEnd() record = fileline + mapline files_stat = Literal("Files:") + whole_number + LineEnd() dirs_stat = Literal("Directories:") + whole_number + LineEnd() resident_stat = Literal( "Resident Pages:" ) + whole_number_ratio + unit_ratio + rational_number + Literal( "%") + LineEnd() elapsed_stat = Literal("Elapsed:") + rational_number + Literal( "seconds") + LineEnd() all_stats = files_stat + dirs_stat + resident_stat + elapsed_stat
from pyparsing import Word, Literal, Regex, Combine, Optional, White, oneOf, ZeroOrMore import string import re class White(White): """ Customize whitespace to match the CSS spec values""" def __init__(self, ws=" \t\r\n\f", min=1, max=0, exact=0): super(White, self).__init__(ws, min, max, exact) escaped = ( Literal("\\").suppress() + #chr(20)-chr(126) + chr(128)-unichr(sys.maxunicode) Regex(u"[\u0020-\u007e\u0080-\uffff]", re.IGNORECASE)) def convertToUnicode(t): return six.unichr(int(t[0], 16)) hex_unicode = ( Literal("\\").suppress() + Regex("[0-9a-f]{1,6}", re.IGNORECASE) + Optional(White(exact=1)).suppress()).setParseAction(convertToUnicode) escape = hex_unicode | escaped #any unicode literal outside the 0-127 ascii range nonascii = Regex(u"[^\u0000-\u007f]")