def parse_string(self, string): '''Populate a new object from a string. Parsing is hard, so we're going to call out to the pyparsing library here. I hope you installed it! FTR: this is hideous. ''' from pyparsing import Suppress, Regex, quotedString, restOfLine, Keyword, nestedExpr, Group, OneOrMore, Word, Literal, alphanums, removeQuotes, replaceWith, nums, printables gr_eq = Literal('=') gr_stripped_string = quotedString.copy().setParseAction( removeQuotes ) gr_opt_quoted_string = gr_stripped_string | restOfLine gr_number = Word(nums) gr_yn = Keyword('yes', caseless=True).setParseAction(replaceWith('1')) | Keyword('no', caseless=True).setParseAction(replaceWith('0')) def _handle_ip(*x): a,b,c = x[2] return ' %s = { %s }' % (a,c[0]) def _handle_diraddr(*x): a,b,c = x[2] self._set(DIRADDRESSES, ' %s' % '\n '.join(c)) return def np(words, fn = gr_opt_quoted_string, action=None): p = Keyword(words[0], caseless=True) for w in words[1:]: p = p | Keyword(w, caseless=True) p = p + gr_eq + fn p.setParseAction(action) return p gr_name = np((NAME,), action=lambda x: self._set_name(x[2])) gr_address = np((ADDRESS,), action=self._parse_setter(ADDRESS)) gr_fd_conn = np(PList('fd connect timeout'), gr_number, self._parse_setter(FD_CONNECT_TIMEOUT, True)) gr_heart = np(PList('heartbeat interval'), gr_number, self._parse_setter(HEARTBEATINTERVAL, True)) gr_max_con = np(PList('maximum console connections'), gr_number, self._parse_setter(MAXIMUMCONSOLECONNECTIONS, True)) gr_max_jobs = np(PList('maximum concurrent jobs'), gr_number, action=self._parse_setter(MAXIMUMCONCURRENTJOBS, True)) gr_pass = np((PASSWORD,), action=self._parse_setter(PASSWORD)) gr_pid = np(PList('pid directory'), action=self._parse_setter(PIDDIRECTORY)) gr_query = np(PList('query file'), action=self._parse_setter(QUERYFILE)) gr_scripts = np(PList('scripts directory'), action=self._parse_setter(SCRIPTS_DIRECTORY)) gr_sd_conn = np(PList('sd connect timeout'), gr_number, self._parse_setter(SD_CONNECT_TIMEOUT, True)) gr_source = np(PList('source address'), action=self._parse_setter(SOURCEADDRESS)) gr_stats = np(PList('statistics retention'), action=self._parse_setter(STATISTICS_RETENTION)) gr_verid = np((VERID,), action=self._parse_setter(VERID)) gr_messages = np((MESSAGES,), action=lambda x:self._parse_setter(MESSAGE_ID, dereference=True)) gr_work_dir = np(PList('working directory'), action=self._parse_setter(WORKINGDIRECTORY)) gr_port = np(PList('dir port'), gr_number, self._parse_setter(PORT, True)) gr_monitor = np((MONITOR,), gr_yn, action=self._parse_setter(MONITOR)) # This is a complicated one da_addr = np(('Addr','Port'), Word(printables), lambda x,y,z: ' '.join(z)) da_ip = np(('IPv4','IPv6','IP'), nestedExpr('{','}', OneOrMore(da_addr).setParseAction(lambda x,y,z: ' ; '.join(z)))).setParseAction(_handle_ip) da_addresses = np(PList('dir addresses'), nestedExpr('{','}', OneOrMore(da_ip)), _handle_diraddr) gr_res = OneOrMore(gr_name | gr_address | gr_fd_conn | gr_heart | gr_max_con | gr_max_jobs | gr_pass | gr_pid | gr_query | gr_scripts | gr_sd_conn | gr_source | gr_stats | gr_verid | gr_messages | gr_work_dir | gr_port | gr_monitor | da_addresses) result = gr_res.parseString(string, parseAll=True) return 'Director: ' + self[NAME]
def order_cluster_by_load(self, cluster_list): # Sample salt output # {'dlceph01.drwg.local': '0.27 0.16 0.15 1/1200 26234'} # define grammar point = Literal('.') number = Word(nums) floatnumber = Combine( number + point + number) float_list = OneOrMore(floatnumber) results = self.salt_client.cmd(','.join(cluster_list), 'cmd.run', ['cat /proc/loadavg'], expr_form='list') load_list = [] self.logger.debug("Salt load return: {load}".format(load=results)) for host in results: host_load = results[host] match = float_list.parseString(host_load) if match: one_min = match[0] five_min = match[1] fifteen_min = match[2] self.logger.debug("Adding Load({host}, {one_min}, {five_min}, {fifteen_min}".format( host=host, one_min=one_min, five_min=five_min, fifteen_min=fifteen_min)) load_list.append(Load(host, one_min, five_min, fifteen_min)) else: self.logger.error("Could not parse host load output") # Sort the list by fifteen min load load_list = sorted(load_list, key=lambda x: x.fifteen_min_load) for load in load_list: self.logger.debug("Sorted load list: " + str(load)) return load_list
def pyparsing_parse(text): WHITESPACE = re.compile(r"\s+") books = {} key_values = {} def normalize(tokens): return WHITESPACE.sub(" ", tokens[0]) def add_key_value(tokens): key_values[tokens.key] = tokens.value def add_book(tokens): books[tokens.identifier] = key_values.copy() key_values.clear() left_brace, right_brace, comma, equals = map(Suppress, "{},=") start = Suppress("@Book") + left_brace identifier = Regex(r"[a-zA-Z][^,\s]*")("identifier") + comma key = Word(alphas, alphanums)("key") value = (Word(nums).setParseAction(lambda t: int(t[0])) | QuotedString('"', multiline=True).setParseAction(normalize) )("value") key_value = (key + equals + value).setParseAction(add_key_value) end = right_brace bibtex = (start + identifier + delimitedList(key_value) + end ).setParseAction(add_book) parser = OneOrMore(bibtex) try: parser.parseString(text) except ParseException as err: print("parse error: {0}".format(err)) return books
def parse_string(self, string): '''Populate a new object from a string. Parsing is hard, so we're going to call out to the pyparsing library here. I hope you installed it! ''' from pyparsing import Suppress, Regex, quotedString, restOfLine, Keyword, nestedExpr, Group, OneOrMore, Word, Literal, alphanums, removeQuotes, replaceWith, nums gr_eq = Literal('=') gr_stripped_string = quotedString.copy().setParseAction( removeQuotes ) gr_opt_quoted_string = gr_stripped_string | restOfLine gr_number = Word(nums) gr_yn = Keyword('yes', caseless=True).setParseAction(replaceWith('1')) | Keyword('no', caseless=True).setParseAction(replaceWith('0')) def np(words, fn = gr_opt_quoted_string, action=None): p = Keyword(words[0], caseless=True) for w in words[1:]: p = p | Keyword(w, caseless=True) p = p + gr_eq + fn p.setParseAction(action) return p gr_line = np((NAME,), action=lambda x: self._set_name(x[2])) gr_line = gr_line | np(PList('sd port'), gr_number, action=self._parse_setter(SDPORT)) gr_line = gr_line | np((ADDRESS,), action=self._parse_setter(ADDRESS)) gr_line = gr_line | np((PASSWORD,), action=self._parse_setter(PASSWORD)) gr_line = gr_line | np((DEVICE,), action=self._parse_setter(DEVICE)) gr_line = gr_line | np(PList('media type'), action=self._parse_setter(MEDIATYPE)) gr_line = gr_line | np(PList('auto changer'), gr_yn, action=self._parse_setter(AUTOCHANGER)) gr_line = gr_line | np(PList('maximum concurrent jobs'), gr_number, action=self._parse_setter(MAXIMUMCONCURRENTJOBS)) gr_line = gr_line | np(PList('allow compression'), gr_yn, action=self._parse_setter(ALLOWCOMPRESSION)) gr_line = gr_line | np(PList('heartbeat interval'), action=self._parse_setter(HEARTBEATINTERVAL)) gr_res = OneOrMore(gr_line) result = gr_res.parseString(string, parseAll=True) return 'Storage: ' + self[NAME]
def parse_ampersand_comment(s): import pyparsing pyparsing.ParserElement.enablePackrat() from pyparsing import Word, Literal, QuotedString, CaselessKeyword, \ OneOrMore, Group, Optional, Suppress, Regex, Dict word = Word(string.letters+string.digits+"%_") key = word.setResultsName("key") + Suppress("=") single_value = (Word(string.letters+string.digits+"-.") | QuotedString("'") | QuotedString('"')) range_value = Group(Suppress("{") + single_value.setResultsName("min") + Suppress(",") + single_value.setResultsName("max") + Suppress("}")) pair = (key + (single_value | range_value).setResultsName("value")) g = OneOrMore(pair) d = [] for x in g.searchString(s): v = x.value if type(v) == str: try: v = float(v) except ValueError: pass else: try: v = map(float, v.asList()) except ValueError: pass d.append((x.key, v)) return d
def init_parser(self): INTEGER = Word(nums) INTEGER.setParseAction(lambda x: int(x[0])) header = INTEGER("species_count") + INTEGER("sequence_length") +\ Suppress(restOfLine) header.setParseAction(self.set_header) sequence_name = Word( alphas + nums + "!#$%&\'*+-./;<=>?@[\\]^_`{|}~", max=100) # Take a copy and disallow line breaks in the bases bases = self.BASES.copy() bases.setWhitespaceChars(" \t") seq_start = sequence_name("species") + bases( "sequence") + Suppress(LineEnd()) seq_start.setParseAction(self.set_seq_start) seq_start_block = OneOrMore(seq_start) seq_start_block.setParseAction(self.set_start_block) seq_continue = bases("sequence") + Suppress(LineEnd()) seq_continue.setParseAction(self.set_seq_continue) seq_continue_block = Suppress(LineEnd()) + OneOrMore(seq_continue) seq_continue_block.setParseAction(self.set_continue_block) return header + seq_start_block + ZeroOrMore(seq_continue_block)
def ifParser(): comma = Literal(",").suppress() hash = Literal("#").suppress() equal = Literal("=").suppress() # Rules and labels rulename = Word (alphanums + "_") rulecategory = oneOf("Protocol_Rules Invariant_Rules Decomposition_Rules Intruder_Rules Init Goal") label = hash + Literal("lb") + equal + rulename + comma + Literal("type") + equal + rulecategory labeledrule = Group(label) + Group(ruleParser()) def labeledruleAction(s,l,t): if t[0][3] == "Protocol_Rules": print "-----------------" print "- Detected rule -" print "-----------------" print t[0] print t[1] print labeledrule.setParseAction(labeledruleAction) # A complete file parser = OneOrMore(labeledrule) parser.ignore("##" + restOfLine) return parser
def parse_string(self, string): '''Populate a new object from a string. Parsing is hard, so we're going to call out to the pyparsing library here. I hope you installed it! ''' from pyparsing import quotedString, restOfLine, Keyword, nestedExpr, OneOrMore, Word, Literal, removeQuotes gr_eq = Literal('=') gr_stripped_string = quotedString.copy().setParseAction( removeQuotes ) gr_opt_quoted_string = gr_stripped_string | restOfLine def np(words, fn = gr_opt_quoted_string, action=None): p = Keyword(words[0], caseless=True) for w in words[1:]: p = p | Keyword(w, caseless=True) p = p + gr_eq + fn p.setParseAction(action) return p gr_line = np((NAME,), action=lambda x: self._set_name(x[2])) for key in self.NULL_KEYS: if key == id: continue gr_line = gr_line | np((key,), action=self._parse_setter(key)) gr_res = OneOrMore(gr_line) result = gr_res.parseString(string, parseAll=True) return 'Console: ' + self[NAME]
def make_grammar(): """Creates the grammar to be used by a spec matcher.""" # This is apparently how pyparsing recommends to be used, # as http://pyparsing.wikispaces.com/share/view/644825 states that # it is not thread-safe to use a parser across threads. unary_ops = ( # Order matters here (so that '=' doesn't match before '==') Literal("==") | Literal("=") | Literal("!=") | Literal("<in>") | Literal(">=") | Literal("<=") | Literal("s==") | Literal("s!=") | # Order matters here (so that '<' doesn't match before '<=') Literal("s<=") | Literal("s<") | # Order matters here (so that '>' doesn't match before '>=') Literal("s>=") | Literal("s>")) or_ = Literal("<or>") # An atom is anything not an keyword followed by anything but whitespace atom = ~(unary_ops | or_) + Regex(r"\S+") unary = unary_ops + atom disjunction = OneOrMore(or_ + atom) # Even-numbered tokens will be '<or>', so we drop them disjunction.setParseAction(lambda _s, _l, t: ["<or>"] + t[1::2]) expr = disjunction | unary | atom return expr
def parseEqun(equation): cForm = Word(ascii_uppercase, ascii_uppercase + ascii_lowercase + digits) equnExpr = Group(ZeroOrMore(cForm + Suppress('+')) + cForm) lhs = equnExpr.setResultsName('lhs') rhs = equnExpr.setResultsName('rhs') chemicalEqun = lhs + "->" + rhs parsedEqun = chemicalEqun.parseString(equation) LHS = parsedEqun['lhs'].asList() RHS = parsedEqun['rhs'].asList() lhsDict = {} rhsDict = {} element = Word(ascii_uppercase, ascii_lowercase) integer = Word(digits).setParseAction(lambda x: int(x[0])) elementRef = Group(element + Optional(integer, default=1)) chemicalFormula = OneOrMore(elementRef) for chemical in LHS: lhsDict[chemical] = Counter() for element, count in chemicalFormula.parseString(chemical): lhsDict[chemical][element] += count for chemical in RHS: rhsDict[chemical] = Counter() for element, count in chemicalFormula.parseString(chemical): rhsDict[chemical][element] += count return lhsDict, rhsDict
def _is_running(self, host): """Checks if a virtual machine is running. @param host: name of the virtual machine. @return: running status. """ #FIXME use domstate instead try: proc = subprocess.Popen([self.options.xen.path, 'list', '-l', host], stdout=subprocess.PIPE, stderr=subprocess.PIPE) output, err = proc.communicate() if proc.returncode != 0: log.debug("Xen returns error checking status for machine %s: %s" % (host, err)) return False data = OneOrMore(nestedExpr()).parseString(output) for row in data.asList()[0]: if row[0] == 'status' and row[1] == '2': return True return False except OSError as e: log.warning("Xen failed to check status for machine %s: %s" % (label, e)) return False
def main(index,line): # Do something with this data. if line is not None: try: operator = Regex(r'(?<![\+\-\^\*/%])[\+\-]|[\^\*/%!]') function = Regex(r'[a-zA-Z_][a-zA-Z0-9_]*(?=([ \t]+)?\()') variable = Regex(r'[+-]?[a-zA-Z_][a-zA-Z0-9_]*(?!([ \t]+)?\()') number = Regex(r'[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?') lbrace = Word('(') rbrace = Word(')') assign = Literal(':=') linebreak = Word('\n') skip = Word(' \t') lexOnly = operator | function | variable | number | lbrace \ | rbrace | assign | linebreak | skip lexAllOnly = OneOrMore(lexOnly) print lexAllOnly.parseString(line) print '\n------------------------------\n' except ParseException, err: print err.line print " "*(err.column-1) + "^" print "Error en la linea, {index}, columna: {e.col} elemento no identificado".format(e=err,index=index) print '\n------------------------------\n'
def read_apx(path): """Generates an alias.ArgumentationFramework from an Aspartix (.apx) file. Parameters ---------- path : file or string File, directory or filename to be read. Returns ------- framework : alias ArgumentationFramework Examples -------- References ---------- http://www.dbai.tuwien.ac.at/research/project/argumentation/systempage/docu.htm """ try: from pyparsing import Word, Literal, nums, alphas, alphanums, Keyword, Group, OneOrMore, Suppress except ImportError: raise ImportError("read_apx requires pyparsing") if not isinstance(path, str): return # Define apx grammar LPAR,RPAR,DOT,COMMA = map(Suppress,"().,") arg,attack,pref,val,valpref,support = map(Keyword, "arg att pref val valpref support".split()) ID = Word(alphas, alphanums) id_pair = Group(ID + COMMA + ID) integer = Word(nums) int_pair = Group(integer + COMMA + integer) arg_cmd = (arg + LPAR + ID("arg*") + RPAR) attack_cmd = (attack + LPAR + id_pair("att*") + RPAR) pref_cmd = (pref + LPAR + id_pair("pref*") + RPAR) val_cmd = (val + LPAR + Group(ID + COMMA + integer)("val*") + RPAR) valpref_cmd = (valpref + LPAR + int_pair("valpref*") + RPAR) support_cmd = (support + LPAR + id_pair("support*") + RPAR) apx = OneOrMore((arg_cmd | attack_cmd | pref_cmd | val_cmd | valpref_cmd | support_cmd) + DOT) f = open(path, 'r') f = f.read() head, tail = ntpath.split(path) framework = al.ArgumentationFramework(tail) try: parsed = apx.parseString(f) except ParseException, e: raise al.ParsingException(e)
def __init__(self): self.ALPHA_LABEL = Regex(r"alpha\[\d+\]:") self.LNL_LABEL = Literal("Final GAMMA-based Score of best tree") self.FRQ_LABEL = Regex(r"Base frequencies: (?=\d+)") ^ Regex(r"ML estimate base freqs\[\d+\]:") self.NAMES_LABEL = Regex(r"Partition: \d+ with name:\s+") self.RATES_LABEL = Regex(r"rates\[\d+\].+?:") self.MODEL_LABEL = Literal("Substitution Matrix:") self.alpha = OneOrMore(Suppress(SkipTo(self.ALPHA_LABEL)) + Suppress(self.ALPHA_LABEL) + FLOAT) self.lnl = Suppress(SkipTo(self.LNL_LABEL)) + Suppress(self.LNL_LABEL) + FLOAT self.frq = OneOrMore(Group(Suppress(SkipTo(self.FRQ_LABEL)) + Suppress(self.FRQ_LABEL) + OneOrMore(FLOAT))) self.names = OneOrMore( Suppress(SkipTo(self.NAMES_LABEL)) + Suppress(self.NAMES_LABEL) + CharsNotIn("\n") + Suppress(LineEnd()) ) self.rates = OneOrMore( Group(Suppress(SkipTo(self.RATES_LABEL)) + Suppress(self.RATES_LABEL) + OneOrMore(FLOAT)) ) self.model = Suppress(SkipTo(self.MODEL_LABEL)) + Suppress(self.MODEL_LABEL) + WORD MODEL_LABEL = Literal("Substitution Matrix:") SCORE_LABEL = Literal("Final GAMMA likelihood:") DESC_LABEL = Literal("Model Parameters of Partition") NAME_LEADIN = Literal(", Name:") DATATYPE_LEADIN = Literal(", Type of Data:") ALPHA_LEADIN = Literal("alpha:") TREELENGTH_LEADIN = Literal("Tree-Length:") RATES_LABEL = Regex(r"rate \w <-> \w:") FREQS_LABEL = Regex(r"freq pi\(\w\):") model = Suppress(SkipTo(MODEL_LABEL)) + Suppress(MODEL_LABEL) + WORD likelihood = Suppress(SkipTo(SCORE_LABEL)) + Suppress(SCORE_LABEL) + FLOAT description = ( Suppress(SkipTo(DESC_LABEL)) + Suppress(DESC_LABEL) + INT + Suppress(NAME_LEADIN) + SPACEDWORD + Suppress(DATATYPE_LEADIN) + WORD ) alpha = Suppress(ALPHA_LEADIN) + FLOAT rates = Suppress(RATES_LABEL) + FLOAT freqs = Suppress(FREQS_LABEL) + FLOAT self._dash_f_e_parser = ( Group(OneOrMore(model)) + likelihood + Group( OneOrMore( Group( description + alpha + Suppress(TREELENGTH_LEADIN) + Suppress(FLOAT) + Group(OneOrMore(rates)) + Group(OneOrMore(freqs)) ) ) ) )
def INTERFACECL_BNF(): """\ pyparser grammar for the yapocis interface specification. Inspired by an IDL parser by Paul McGuire, shipped as a demo with pyparser. """ global bnf if not bnf: # punctuation lbrace = Literal("{") rbrace = Literal("}") lparen = Literal("(") rparen = Literal(")") dot = Literal(".") star = Literal("*") semi = Literal(";") # keywords boolean_ = Keyword("boolean") char_ = Keyword("char") complex64_ = Keyword("complex64") float_ = Keyword("float") float32_ = Keyword("float32") inout_ = Keyword("inout") interface_ = Keyword("interface") in_ = Keyword("in") int_ = Keyword("int") int16_ = Keyword("int16") int32_ = Keyword("int32") kernel_ = Keyword("kernel") out_ = Keyword("out") short_ = Keyword("short") uint16_ = Keyword("uint16") uint32_ = Keyword("uint32") void_ = Keyword("void") # Special keywords alias_ = Keyword("alias") as_ = Keyword("as") outlike_ = Keyword("outlike") resident_ = Keyword("resident") widthof_ = Keyword("widthof") heightof_ = Keyword("heightof") sizeof_ = Keyword("sizeof") identifier = Word( alphas, alphanums + "_" ) typeName = (boolean_ ^ char_ ^ int16_ ^ int32_ ^ float32_ ^ complex64_ ^ uint16_ ^ uint32_ ^ int_ ^ float_ ^ short_) bufferHints = (inout_ | in_ | out_ | outlike_ | resident_ | widthof_ | heightof_ | sizeof_) paramlist = delimitedList( Group(bufferHints + Optional(typeName) + Optional(star) + identifier)) interfaceItem = ((kernel_^void_^alias_^typeName) + identifier + Optional(Group(as_+identifier)) + lparen + Optional(paramlist) + rparen + semi) interfaceDef = Group(interface_ + identifier + lbrace + ZeroOrMore(interfaceItem) + rbrace + semi) moduleItem = interfaceDef bnf = OneOrMore( moduleItem ) singleLineComment = "//" + restOfLine bnf.ignore( singleLineComment ) bnf.ignore( cStyleComment ) return bnf
def analyzeVerse(instr): swp = swap_agaram(unicode(instr)) parse_syntax = OneOrMore(adi).leaveWhitespace() try: result = parse_syntax.parseString(swp, parseAll=True) return generateXML(result) except Exception,e: return None
def parse_string(self, string): '''Populate a new object from a string. Parsing is hard, so we're going to call out to the pyparsing library here. I hope you installed it! ''' from pyparsing import quotedString, restOfLine, Keyword, nestedExpr, OneOrMore, Word, Literal, removeQuotes, nums, replaceWith, printables gr_eq = Literal('=') gr_stripped_string = quotedString.copy().setParseAction( removeQuotes ) gr_opt_quoted_string = gr_stripped_string | restOfLine gr_number = Word(nums) gr_yn = Keyword('yes', caseless=True).setParseAction(replaceWith('1')) | Keyword('no', caseless=True).setParseAction(replaceWith('0')) def _handle_ip(*x): a,b,c = x[2] return ' %s = { %s }' % (a,c[0]) def _handle_fdaddr(*x): a,b,c = x[2] self._set(FDADDRESSES, ' %s' % '\n '.join(c)) return def np(words, fn = gr_opt_quoted_string, action=None): p = Keyword(words[0], caseless=True) for w in words[1:]: p = p | Keyword(w, caseless=True) p = p + gr_eq + fn p.setParseAction(action) return p gr_line = np((NAME,), action=lambda x: self._set_name(x[2])) gr_line = gr_line | np((ADDRESS,), action=self._parse_setter(ADDRESS)) gr_line = gr_line | np((CATALOG,), action=self._parse_setter(CATALOG_ID, dereference=True)) gr_line = gr_line | np((PASSWORD,), action=self._parse_setter(PASSWORD)) gr_line = gr_line | np(PList('file retention'), action=self._parse_setter(FILERETENTION)) gr_line = gr_line | np(PList('job retention'), action=self._parse_setter(JOBRETENTION)) gr_line = gr_line | np((PRIORITY,), gr_number, action=self._parse_setter(PRIORITY)) gr_line = gr_line | np(PList('working directory'), action=self._parse_setter(WORKINGDIRECTORY)) gr_line = gr_line | np(PList('pid directory'), action=self._parse_setter(PIDDIRECTORY)) gr_line = gr_line | np(PList('heart beat interval'), action=self._parse_setter(HEARTBEATINTERVAL)) gr_line = gr_line | np(PList('fd address'), action=self._parse_setter(FDADDRESS)) gr_line = gr_line | np(PList('fd source address'), action=self._parse_setter(FDSOURCEADDRESS)) gr_line = gr_line | np(PList('pki key pair'), action=self._parse_setter(PKIKEYPAIR)) gr_line = gr_line | np(PList('pki master key'), action=self._parse_setter(PKIMASTERKEY)) gr_line = gr_line | np(PList('fd port'), gr_number, action=self._parse_setter(FDPORT)) gr_line = gr_line | np(PList('auto prune'), gr_yn, action=self._parse_setter(AUTOPRUNE)) gr_line = gr_line | np(PList('maximum concurrent jobs'), gr_number, action=self._parse_setter(FDPORT)) gr_line = gr_line | np(PList('pki encryption'), gr_yn, action=self._parse_setter(PKIENCRYPTION)) gr_line = gr_line | np(PList('pki signatures'), gr_yn, action=self._parse_setter(PKISIGNATURES)) # This is a complicated one da_addr = np(('Addr','Port'), Word(printables), lambda x,y,z: ' '.join(z)) da_ip = np(('IPv4','IPv6','IP'), nestedExpr('{','}', OneOrMore(da_addr).setParseAction(lambda x,y,z: ' ; '.join(z)))).setParseAction(_handle_ip) da_addresses = np(('fd addresses', FDADDRESSES), nestedExpr('{','}', OneOrMore(da_ip)), _handle_fdaddr) gr_res = OneOrMore(gr_line|da_addresses) result = gr_res.parseString(string, parseAll=True) return 'Client: ' + self[NAME]
def expression(): """ """ def transformer(string, location, tokens): return tokens.asList() token = OneOrMore(term()) token.setName("expression") token.setParseAction(transformer) return token
def get_highlight_expression(): field_expression = Word(srange("[a-zA-Z0-9_.*]")) field_expression.setParseAction(parse_highlight_field_expression) fields_expression = OneOrMore( field_expression + Optional(',').suppress()) fields_expression.setParseAction(parse_highlight_expression) highlight_expression = Word('highlight:').suppress() \ + Word('[').suppress() \ + fields_expression + Word(']').suppress() return highlight_expression
def split_chemical_formula(formula): def is_number(s): try: float(s) return True except ValueError: return False def replace_things(stringg, listt, replacement): for x in listt: stringg = stringg.replace(x, replacement) return stringg bad_chars = ["(", ")", "-", "."] formula = replace_things(formula, bad_chars, "|") if is_number(formula): return [["", 0]] if len(formula) == 0: return [["", 0]] # define some strings to use later, when describing valid lists # of characters for chemical symbols and numbers caps = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" lowers = caps.lower() digits = "0123456789" # Version 1 # Version 2 - Auto-convert integers, and add results names def convertIntegers(tokens): return int(tokens[0]) element = Word(caps, lowers) integer = Word(digits).setParseAction(convertIntegers) elementRef = Group(element("symbol") + Optional(integer, default=1)("qty")) # pre-1.4.7, use this: # elementRef = Group( element.setResultsName("symbol") + Optional( integer, default=1 ).setResultsName("qty") ) chemicalFormula = OneOrMore(elementRef) # Version 3 - Compute partial molecular weight per element, simplifying # summing # No need to redefine grammar, just define parse action function, and # attach to elementRef def computeElementWeight(tokens): element = tokens[0] element["weight"] = atomicWeight[element.symbol] * element.qty elementRef.setParseAction(computeElementWeight) formulaData = chemicalFormula.parseString(formula) mw = sum([element.weight for element in formulaData]) return formulaData
def manyXYZ(pathXYZ): """ Read one or more molecular geometries in XYZ format from a file. :param: pathXYZ :type: string :return: [[AtomXYZ]] """ manyMol = OneOrMore(Group(parser_xyz)) xss = manyMol.parseFile(pathXYZ) return list(map(createAtoms, xss))
def load_c45_header(filename): """Load random variables definitions from file (in C45 format). File must contain information in format 'Variable Name: Values.' as in the example below: 0,1. A: true,false. B: 0,1,2. C: c1,c2,c3,c4. D: one. The first line is related to the class object (expressed in last position at the output header) """ from DataStructures.randomvariables import RandomVariable RV = [] cvariable = OneOrMore(Word(caps + lowers + digits + ".") + Optional(Suppress(","))).setResultsName("domain") variable = Word(caps + lowers + digits).setResultsName("name") + ": " + OneOrMore(Word(caps + lowers + digits + ".") + Optional(Suppress(","))).setResultsName("domain") class_variable = None for line in file(filename): if not line[0] == '#' and len(line) > 1: if class_variable is None: dataline = line[0:(len(line)-2)] #print dataline rv = cvariable.parseString(dataline) domain = [] for value in rv.domain: #print value, value = ''.join(value) if value.isdigit(): #print 'lv#', value domain.append(int(value)) else: domain.append(value) #print class_variable = RandomVariable('class',domain) else: dataline = line[0:(len(line)-2)] #print dataline rv = variable.parseString(dataline) #print rv.name domain = [] for value in rv.domain: #print value, value = ''.join(value) if value.isdigit(): #print 'lv#', value domain.append(int(value)) else: domain.append(value) #print var = RandomVariable(rv.name,domain) RV.append(var) RV.append(class_variable) return RV
def parse_list_vms(stdout, stderr): """ """ id_vm_name = dblQuotedString(alphas).setResultsName('name') id_vm_uuid = Word(srange("[a-zA-Z0-9_\-]")).setResultsName('uuid') left_brace = Suppress("{") right_brace = Suppress("}") vm_group = Group(id_vm_name + left_brace + id_vm_uuid + right_brace) vm_list = OneOrMore(vm_group) token_lists = vm_list.parseString(stdout, parseAll=True) return [{'name': token_list.name.replace('\"', ''), 'uuid': token_list.uuid} for token_list in token_lists]
def sdio_ids_grammar(): vendor_line = NUM4('vendor') + text_eol('text') device_line = TAB + NUM4('device') + text_eol('text') vendor = (vendor_line('VENDOR') + ZeroOrMore(device_line('DEVICES*') ^ COMMENTLINE.suppress())) klass = klass_grammar() commentgroup = OneOrMore(COMMENTLINE).suppress() ^ EMPTYLINE.suppress() grammar = OneOrMore(vendor('VENDORS*') ^ klass('CLASSES*') ^ commentgroup) + stringEnd() grammar.parseWithTabs() return grammar
def get_parser(): """Return a section parser. @see grammar.md for the whole grammar.""" if Section._parser is None: # Parser not yet defined. Defining it. head_type = Literal("V") | Literal("C") | Literal("@") head = OPEN_HEAD \ + head_type.setResultsName("type") \ + INTEGER.setResultsName("id") \ + CLOSE_HEAD + EOL content_line = WORDS + EOL content = OneOrMore(content_line) Section._parser = Group(head + content.setResultsName("content")) return Section._parser
def parse_list_ostypes(stdout, stderr): """ """ id_label = Suppress(Word("ID:")) id_os_type = Word(alphanums + "-" + "/" + "]" + "_").\ setResultsName('os_type') desc_label = Suppress(Word("Description:")) id_os_desc = Word(alphanums + "/" + " " + "(" + ")" + ".").\ setResultsName('os_desc') os_type_group = Group(id_label + id_os_type + EOL + desc_label + id_os_desc) os_type_list = OneOrMore(os_type_group) token_lists = os_type_list.parseString(stdout, parseAll=True) return [{'os_type': token_list.os_type, 'os_desc': token_list.os_desc} for token_list in token_lists]
def parse_string(self, string): '''Populate a new object from a string. Parsing is hard, so we're going to call out to the pyparsing library here. I hope you installed it! ''' from pyparsing import Suppress, Regex, quotedString, restOfLine, Keyword, nestedExpr, Group, OneOrMore, Word, Literal, alphanums, removeQuotes, replaceWith, nums gr_eq = Literal('=') gr_stripped_string = quotedString.copy().setParseAction( removeQuotes ) gr_opt_quoted_string = gr_stripped_string | restOfLine gr_number = Word(nums) gr_yn = Keyword('yes', caseless=True).setParseAction(replaceWith('1')) | Keyword('no', caseless=True).setParseAction(replaceWith('0')) def np(words, fn = gr_opt_quoted_string, action=None): p = Keyword(words[0], caseless=True) for w in words[1:]: p = p | Keyword(w, caseless=True) p = p + gr_eq + fn p.setParseAction(action) return p gr_line = np((NAME,), action=lambda x: self._set_name(x[2])) gr_line = gr_line | np(PList('pool type'), action=self._parse_setter(POOLTYPE)) gr_line = gr_line | np(PList('maximum volumes'), action=self._parse_setter(MAXIMUMVOLUMES)) gr_line = gr_line | np((STORAGE,), action=self._parse_setter(STORAGE)) gr_line = gr_line | np(PList('use volume once'), gr_yn, action=self._parse_setter(USEVOLUMEONCE)) gr_line = gr_line | np(PList('catalog files'), gr_yn, action=self._parse_setter(CATALOGFILES)) gr_line = gr_line | np(PList('auto prune'), gr_yn, action=self._parse_setter(AUTOPRUNE)) gr_line = gr_line | np((RECYCLE,), gr_yn, action=self._parse_setter(RECYCLE)) gr_line = gr_line | np(PList('recycle oldest volume'), gr_yn, action=self._parse_setter(RECYCLEOLDESTVOLUME)) gr_line = gr_line | np(PList('recycle current volume'), gr_yn, action=self._parse_setter(RECYCLECURRENTVOLUME)) gr_line = gr_line | np(PList('purge oldest volume'), gr_yn, action=self._parse_setter(PURGEOLDESTVOLUME)) gr_line = gr_line | np(PList('maximum volume jobs'), gr_number, action=self._parse_setter(MAXIMUMVOLUMEJOBS)) gr_line = gr_line | np(PList('maximum volume files'), gr_number, action=self._parse_setter(MAXIMUMVOLUMEFILES)) gr_line = gr_line | np(PList('maximum volume bytes'), action=self._parse_setter(MAXIMUMVOLUMEBYTES)) gr_line = gr_line | np(PList('volume use duration'), action=self._parse_setter(VOLUMEUSEDURATION)) gr_line = gr_line | np(PList('volume retention'), action=self._parse_setter(VOLUMERETENTION)) gr_line = gr_line | np(PList('action on purge'), action=self._parse_setter(ACTIONONPURGE)) gr_line = gr_line | np(PList('scratch pool'), action=self._parse_setter(SCRATCHPOOL)) gr_line = gr_line | np(PList('recycle pool'), action=self._parse_setter(RECYCLEPOOL)) gr_line = gr_line | np(PList('file retention'), action=self._parse_setter(FILERETENTION)) gr_line = gr_line | np(PList('job retention'), action=self._parse_setter(JOBRETENTION)) gr_line = gr_line | np(PList('cleaning prefix'), action=self._parse_setter(CLEANINGPREFIX)) gr_line = gr_line | np(PList('label format'), action=self._parse_setter(LABELFORMAT)) gr_res = OneOrMore(gr_line) result = gr_res.parseString(string, parseAll=True) return 'Pool: ' + self[NAME]
class SGF(object): def __init__(self, filename): # BNF start = Literal(";") text = QuotedString(quoteChar="[", escChar="\\", multiline=True, unquoteResults=True, endQuoteChar="]") prop_id = Word(srange("[A-Za-z]"), min=1, max=10) prop = prop_id + Group(OneOrMore(text)) node = ZeroOrMore(start) + OneOrMore(prop) sequence = OneOrMore(node) branch = Forward() branch << "(" + sequence + ZeroOrMore(branch) + ")" self.game = OneOrMore(branch) self.sgf_content = filename self.moves = None self.__parse() self.current = 0 def next_token(self): tok = self.moves[self.current] self.current += 1 if _debug_: print "SGF: ", tok return tok def __parse(self): self.moves = self.game.parseString(self.sgf_content) def show(self): print "All moves in %s" % self.sgf_content pprint(self.moves)
def __init__(self): self.MODEL_LABEL = Regex(r'Model of.*substitution:\s+') self.ALPHA_LABEL = Regex(r'Gamma shape parameter:\s+') self.LNL_LABEL = Regex(r'Log-likelihood:\s+') self.F_LABEL = Regex(r'f\(([ACGT])\)=\s+') self.R_LABEL = Regex(r'[ACGT]\s+<->\s+[ACGT]\s+') self.TSTV_LABEL = Regex(r'Transition/transversion ratio.*:\s+') self.model = Suppress(SkipTo(self.MODEL_LABEL)) + Suppress(self.MODEL_LABEL) + WORD self.lnl = Suppress(SkipTo(self.LNL_LABEL)) + Suppress(self.LNL_LABEL) + FLOAT self.alpha = Suppress(SkipTo(self.ALPHA_LABEL)) + Suppress(self.ALPHA_LABEL) + FLOAT self.common = self.model + self.lnl + self.alpha self.tstv = OneOrMore(Suppress(SkipTo(self.TSTV_LABEL)) + Suppress(self.TSTV_LABEL) + FLOAT) self.freq = OneOrMore(Suppress(SkipTo(self.F_LABEL)) + Suppress(self.F_LABEL) + FLOAT) self.rates = OneOrMore(Suppress(SkipTo(self.R_LABEL)) + Suppress(self.R_LABEL) + FLOAT) self.gtr_specific = Group(self.freq) + Group(self.rates) self.hky_specific = Group(self.tstv) + Group(self.freq)
def get_facet_expression(): facet_logical_expression = get_nested_logical_expression() single_facet_expression = Word( srange("[a-zA-Z0-9_.]")) +\ Optional( Word('(').suppress() + OneOrMore(facet_logical_expression).setParseAction(parse_one_or_more_facets_expression) + Word(')').suppress()) single_facet_expression.setParseAction(parse_single_facet_expression) base_facets_expression = OneOrMore(single_facet_expression + Optional(',').suppress()) base_facets_expression.setParseAction(parse_base_facets_expression) facets_expression = Word('facets:').suppress() \ + Word('[').suppress() \ + base_facets_expression + Word(']').suppress() return facets_expression
def __init__(self, fragment_file, sdkconfig): try: fragment_file = open(fragment_file, "r") except TypeError: pass path = os.path.realpath(fragment_file.name) indent_stack = [1] class parse_ctx: fragment = None # current fragment key = "" # current key keys = list() # list of keys parsed key_grammar = None # current key grammar @staticmethod def reset(): parse_ctx.fragment_instance = None parse_ctx.key = "" parse_ctx.keys = list() parse_ctx.key_grammar = None def fragment_type_parse_action(toks): parse_ctx.reset() parse_ctx.fragment = FRAGMENT_TYPES[ toks[0]]() # create instance of the fragment return None def expand_conditionals(toks, stmts): try: stmt = toks["value"] stmts.append(stmt) except KeyError: try: conditions = toks["conditional"] for condition in conditions: try: _toks = condition[1] _cond = condition[0] if sdkconfig.evaluate_expression(_cond): expand_conditionals(_toks, stmts) break except IndexError: expand_conditionals(condition[0], stmts) except KeyError: for tok in toks: expand_conditionals(tok, stmts) def key_body_parsed(pstr, loc, toks): stmts = list() expand_conditionals(toks, stmts) if parse_ctx.key_grammar.min and len( stmts) < parse_ctx.key_grammar.min: raise ParseFatalException( pstr, loc, "fragment requires at least %d values for key '%s'" % (parse_ctx.key_grammar.min, parse_ctx.key)) if parse_ctx.key_grammar.max and len( stmts) > parse_ctx.key_grammar.max: raise ParseFatalException( pstr, loc, "fragment requires at most %d values for key '%s'" % (parse_ctx.key_grammar.max, parse_ctx.key)) try: parse_ctx.fragment.set_key_value(parse_ctx.key, stmts) except Exception as e: raise ParseFatalException( pstr, loc, "unable to add key '%s'; %s" % (parse_ctx.key, e.message)) return None key = Word(alphanums + "_") + Suppress(":") key_stmt = Forward() condition_block = indentedBlock(key_stmt, indent_stack) key_stmts = OneOrMore(condition_block) key_body = Suppress(key) + key_stmts key_body.setParseAction(key_body_parsed) condition = originalTextFor( SDKConfig.get_expression_grammar()).setResultsName("condition") if_condition = Group( Suppress("if") + condition + Suppress(":") + condition_block) elif_condition = Group( Suppress("elif") + condition + Suppress(":") + condition_block) else_condition = Group( Suppress("else") + Suppress(":") + condition_block) conditional = (if_condition + Optional(OneOrMore(elif_condition)) + Optional(else_condition)).setResultsName("conditional") def key_parse_action(pstr, loc, toks): key = toks[0] if key in parse_ctx.keys: raise ParseFatalException( pstr, loc, "duplicate key '%s' value definition" % parse_ctx.key) parse_ctx.key = key parse_ctx.keys.append(key) try: parse_ctx.key_grammar = parse_ctx.fragment.get_key_grammars( )[key] key_grammar = parse_ctx.key_grammar.grammar except KeyError: raise ParseFatalException( pstr, loc, "key '%s' is not supported by fragment" % key) except Exception as e: raise ParseFatalException( pstr, loc, "unable to parse key '%s'; %s" % (key, e.message)) key_stmt << (conditional | Group(key_grammar).setResultsName("value")) return None def name_parse_action(pstr, loc, toks): parse_ctx.fragment.name = toks[0] key.setParseAction(key_parse_action) ftype = Word(alphas).setParseAction(fragment_type_parse_action) fid = Suppress(":") + Word(alphanums + "_.").setResultsName("name") fid.setParseAction(name_parse_action) header = Suppress("[") + ftype + fid + Suppress("]") def fragment_parse_action(pstr, loc, toks): key_grammars = parse_ctx.fragment.get_key_grammars() required_keys = set( [k for (k, v) in key_grammars.items() if v.required]) present_keys = required_keys.intersection(set(parse_ctx.keys)) if present_keys != required_keys: raise ParseFatalException( pstr, loc, "required keys %s for fragment not found" % list(required_keys - present_keys)) return parse_ctx.fragment fragment_stmt = Forward() fragment_block = indentedBlock(fragment_stmt, indent_stack) fragment_if_condition = Group( Suppress("if") + condition + Suppress(":") + fragment_block) fragment_elif_condition = Group( Suppress("elif") + condition + Suppress(":") + fragment_block) fragment_else_condition = Group( Suppress("else") + Suppress(":") + fragment_block) fragment_conditional = ( fragment_if_condition + Optional(OneOrMore(fragment_elif_condition)) + Optional(fragment_else_condition)).setResultsName("conditional") fragment = (header + OneOrMore(indentedBlock(key_body, indent_stack, False))).setResultsName("value") fragment.setParseAction(fragment_parse_action) fragment.ignore("#" + restOfLine) deprecated_mapping = DeprecatedMapping.get_fragment_grammar( sdkconfig, fragment_file.name).setResultsName("value") fragment_stmt << (Group(deprecated_mapping) | Group(fragment) | Group(fragment_conditional)) def fragment_stmt_parsed(pstr, loc, toks): stmts = list() expand_conditionals(toks, stmts) return stmts parser = ZeroOrMore(fragment_stmt) parser.setParseAction(fragment_stmt_parsed) self.fragments = parser.parseFile(fragment_file, parseAll=True) for fragment in self.fragments: fragment.path = path
def parse_query_string(self, query_string): # pylint: disable=too-many-locals """ Function that parse the querystring, extracting infos for limit, offset, ordering, filters, attribute and extra projections. :param query_string (as obtained from request.query_string) :return: parsed values for the querykeys """ from pyparsing import Word, alphas, nums, alphanums, printables, \ ZeroOrMore, OneOrMore, Suppress, Optional, Literal, Group, \ QuotedString, Combine, \ StringStart as SS, StringEnd as SE, \ WordEnd as WE, \ ParseException from pyparsing import pyparsing_common as ppc from dateutil import parser as dtparser from psycopg2.tz import FixedOffsetTimezone ## Define grammar # key types key = Word(alphas + '_', alphanums + '_') # operators operator = (Literal('=like=') | Literal('=ilike=') | Literal('=in=') | Literal('=notin=') | Literal('=') | Literal('!=') | Literal('>=') | Literal('>') | Literal('<=') | Literal('<')) # Value types value_num = ppc.number value_bool = ( Literal('true') | Literal('false')).addParseAction(lambda toks: bool(toks[0])) value_string = QuotedString('"', escQuote='""') value_orderby = Combine(Optional(Word('+-', exact=1)) + key) ## DateTimeShift value. First, compose the atomic values and then # combine # them and convert them to datetime objects # Date value_date = Combine( Word(nums, exact=4) + Literal('-') + Word(nums, exact=2) + Literal('-') + Word(nums, exact=2)) # Time value_time = Combine( Literal('T') + Word(nums, exact=2) + Optional(Literal(':') + Word(nums, exact=2)) + Optional(Literal(':') + Word(nums, exact=2))) # Shift value_shift = Combine( Word('+-', exact=1) + Word(nums, exact=2) + Optional(Literal(':') + Word(nums, exact=2))) # Combine atomic values value_datetime = Combine( value_date + Optional(value_time) + Optional(value_shift) + WE(printables.replace('&', '')) # To us the # word must end with '&' or end of the string # Adding WordEnd only here is very important. This makes atomic # values for date, time and shift not really # usable alone individually. ) ######################################################################## def validate_time(toks): """ Function to convert datetime string into datetime object. The format is compliant with ParseAction requirements :param toks: datetime string passed in tokens :return: datetime object """ datetime_string = toks[0] # Check the precision precision = len(datetime_string.replace('T', ':').split(':')) # Parse try: dtobj = dtparser.parse(datetime_string) except ValueError: raise RestInputValidationError( 'time value has wrong format. The ' 'right format is ' '<date>T<time><offset>, ' 'where <date> is expressed as ' '[YYYY]-[MM]-[DD], ' '<time> is expressed as [HH]:[MM]:[' 'SS], ' '<offset> is expressed as +/-[HH]:[' 'MM] ' 'given with ' 'respect to UTC') if dtobj.tzinfo is not None and dtobj.utcoffset() is not None: tzoffset_minutes = int(dtobj.utcoffset().total_seconds() // 60) return DatetimePrecision( dtobj.replace(tzinfo=FixedOffsetTimezone( offset=tzoffset_minutes, name=None)), precision) return DatetimePrecision( dtobj.replace(tzinfo=FixedOffsetTimezone(offset=0, name=None)), precision) ######################################################################## # Convert datetime value to datetime object value_datetime.setParseAction(validate_time) # More General types value = (value_string | value_bool | value_datetime | value_num | value_orderby) # List of values (I do not check the homogeneity of the types of values, # query builder will do it somehow) value_list = Group(value + OneOrMore(Suppress(',') + value) + Optional(Suppress(','))) # Fields single_field = Group(key + operator + value) list_field = Group(key + (Literal('=in=') | Literal('=notin=')) + value_list) orderby_field = Group(key + Literal('=') + value_list) field = (list_field | orderby_field | single_field) # Fields separator separator = Suppress(Literal('&')) # General query string general_grammar = SS() + Optional(field) + ZeroOrMore( separator + field) + \ Optional(separator) + SE() ## Parse the query string try: fields = general_grammar.parseString(query_string) # JQuery adds _=timestamp a parameter to not use cached data/response. # To handle query, remove this "_" parameter from the query string # For more details check issue #789 # (https://github.com/aiidateam/aiida-core/issues/789) in aiida-core field_list = [ entry for entry in fields.asList() if entry[0] != '_' ] except ParseException as err: raise RestInputValidationError( 'The query string format is invalid. ' "Parser returned this massage: \"{" "}.\" Please notice that the column " 'number ' 'is counted from ' 'the first character of the query ' 'string.'.format(err)) ## return the translator instructions elaborated from the field_list return self.build_translator_parameters(field_list)
#import multiprocessing import itertools from pyparsing import Word, Literal, alphas, nums, alphanums, OneOrMore, Optional, SkipTo, ParseException, Group, ZeroOrMore, Suppress, Combine, delimitedList, quotedString, nestedExpr, ParseResults, oneOf, ungroup # define punctuation - reuse of expressions helps packratting work better LPAR, RPAR, LBRACK, RBRACK, COMMA, EQ = map(Literal, "()[],=") #Qualifier to go in front of type in the argument list (unsigned const int foo) qualifier = OneOrMore(oneOf('const unsigned typename struct enum')) qualifier = ungroup(qualifier.addParseAction(' '.join)) def turn_parseresults_to_list(s, loc, toks): return ParseResults(normalise_templates(toks[0].asList())) def normalise_templates(toks, isinstance=isinstance, basestring=basestring): s_list = ['<'] s_list_append = s_list.append #lookup append func once, instead of many times for tok in toks: if isinstance(tok, basestring): #See if it's a string s_list_append(' ' + tok) else: #If it's not a string s_list_append(normalise_templates(tok)) s_list_append(' >') return ''.join(s_list) #Skip pairs of brackets.
# (originally I had pyparsing pulling out the $(Macro) references from inside names # as well, but the framework doesn't work especially well without whitespace delimiters between # tokens so we just do simple find/replace in a second pass pv_name = Word(alphanums + ":._$()") pv_value = (float_number | Word(alphanums)) pv_assignment = pv_name + pv_value comment = Literal("#") + Regex(r".*") macro = Group(Word(alphas) + Literal("=").suppress() + pv_name) macros = Optional(macro + ZeroOrMore(Word(";,").suppress() + macro)) #file_include = Literal("file") + pv_name + macros file_include = Literal("file") + \ (file_name | ignored_quote + file_name + ignored_quote) \ + Optional(ignored_comma) + macros def line(contents): return LineStart() + ZeroOrMore(Group(contents)) + LineEnd().suppress() req_line = line(file_include | comment.suppress() | pv_name) req_file = OneOrMore(req_line) + StringEnd().suppress() sav_line = line(comment.suppress() | Literal("<END>").suppress() | pv_assignment) sav_file = OneOrMore(sav_line) + StringEnd().suppress()
def Verilog_BNF(): global verilogbnf if verilogbnf is None: # compiler directives compilerDirective = Combine( "`" + \ oneOf("define undef ifdef else endif default_nettype " "include resetall timescale unconnected_drive " "nounconnected_drive celldefine endcelldefine") + \ restOfLine ).setName("compilerDirective") # primitives SEMI, COLON, LPAR, RPAR, LBRACE, RBRACE, LBRACK, RBRACK, DOT, COMMA, EQ = map( Literal, ";:(){}[].,=") identLead = alphas + "$_" identBody = alphanums + "$_" identifier1 = Regex(r"\.?[" + identLead + "][" + identBody + r"]*(\.[" + identLead + "][" + identBody + "]*)*").setName("baseIdent") identifier2 = Regex(r"\\\S+").setParseAction( lambda t: t[0][1:]).setName("escapedIdent") #.setDebug() identifier = identifier1 | identifier2 assert (identifier2 == r'\abc') hexnums = nums + "abcdefABCDEF" + "_?" base = Regex("'[bBoOdDhH]").setName("base") basedNumber = Combine(Optional(Word(nums + "_")) + base + Word(hexnums + "xXzZ"), joinString=" ", adjacent=False).setName("basedNumber") #~ number = ( basedNumber | Combine( Word( "+-"+spacedNums, spacedNums ) + #~ Optional( DOT + Optional( Word( spacedNums ) ) ) + #~ Optional( e + Word( "+-"+spacedNums, spacedNums ) ) ).setName("numeric") ) number = ( basedNumber | \ Regex(r"[+-]?[0-9_]+(\.[0-9_]*)?([Ee][+-]?[0-9_]+)?") \ ).setName("numeric") #~ decnums = nums + "_" #~ octnums = "01234567" + "_" expr = Forward().setName("expr") concat = Group(LBRACE + delimitedList(expr) + RBRACE) multiConcat = Group("{" + expr + concat + "}").setName("multiConcat") funcCall = Group(identifier + LPAR + Optional(delimitedList(expr)) + RPAR).setName("funcCall") subscrRef = Group(LBRACK + delimitedList(expr, COLON) + RBRACK) subscrIdentifier = Group(identifier + Optional(subscrRef)) #~ scalarConst = "0" | (( FollowedBy('1') + oneOf("1'b0 1'b1 1'bx 1'bX 1'B0 1'B1 1'Bx 1'BX 1") )) scalarConst = Regex("0|1('[Bb][01xX])?") mintypmaxExpr = Group(expr + COLON + expr + COLON + expr).setName("mintypmax") primary = (number | (LPAR + mintypmaxExpr + RPAR) | (LPAR + Group(expr) + RPAR).setName("nestedExpr") | multiConcat | concat | dblQuotedString | funcCall | subscrIdentifier) unop = oneOf("+ - ! ~ & ~& | ^| ^ ~^").setName("unop") binop = oneOf( "+ - * / % == != === !== && " "|| < <= > >= & | ^ ^~ >> << ** <<< >>>").setName( "binop") expr << ((unop + expr) | # must be first! (primary + "?" + expr + COLON + expr) | (primary + Optional(binop + expr))) lvalue = subscrIdentifier | concat # keywords if_ = Keyword("if") else_ = Keyword("else") edge = Keyword("edge") posedge = Keyword("posedge") negedge = Keyword("negedge") specify = Keyword("specify") endspecify = Keyword("endspecify") fork = Keyword("fork") join = Keyword("join") begin = Keyword("begin") end = Keyword("end") default = Keyword("default") forever = Keyword("forever") repeat = Keyword("repeat") while_ = Keyword("while") for_ = Keyword("for") case = oneOf("case casez casex") endcase = Keyword("endcase") wait = Keyword("wait") disable = Keyword("disable") deassign = Keyword("deassign") force = Keyword("force") release = Keyword("release") assign = Keyword("assign") eventExpr = Forward() eventTerm = (posedge + expr) | (negedge + expr) | expr | ( LPAR + eventExpr + RPAR) eventExpr << (Group(delimitedList(eventTerm, Keyword("or")))) eventControl = Group("@" + ( (LPAR + eventExpr + RPAR) | identifier | "*")).setName("eventCtrl") delayArg = ( number | Word(alphanums + "$_") | #identifier | (LPAR + Group(delimitedList(mintypmaxExpr | expr)) + RPAR)).setName("delayArg") #.setDebug() delay = Group("#" + delayArg).setName("delay") #.setDebug() delayOrEventControl = delay | eventControl assgnmt = Group(lvalue + EQ + Optional(delayOrEventControl) + expr).setName("assgnmt") nbAssgnmt = Group((lvalue + "<=" + Optional(delay) + expr) | (lvalue + "<=" + Optional(eventControl) + expr)).setName("nbassgnmt") range = LBRACK + expr + COLON + expr + RBRACK paramAssgnmt = Group(identifier + EQ + expr).setName("paramAssgnmt") parameterDecl = Group("parameter" + Optional(range) + delimitedList(paramAssgnmt) + SEMI).setName("paramDecl") inputDecl = Group("input" + Optional(range) + delimitedList(identifier) + SEMI) outputDecl = Group("output" + Optional(range) + delimitedList(identifier) + SEMI) inoutDecl = Group("inout" + Optional(range) + delimitedList(identifier) + SEMI) regIdentifier = Group(identifier + Optional(LBRACK + expr + COLON + expr + RBRACK)) regDecl = Group("reg" + Optional("signed") + Optional(range) + delimitedList(regIdentifier) + SEMI).setName("regDecl") timeDecl = Group("time" + delimitedList(regIdentifier) + SEMI) integerDecl = Group("integer" + delimitedList(regIdentifier) + SEMI) strength0 = oneOf("supply0 strong0 pull0 weak0 highz0") strength1 = oneOf("supply1 strong1 pull1 weak1 highz1") driveStrength = Group(LPAR + ((strength0 + COMMA + strength1) | (strength1 + COMMA + strength0)) + RPAR).setName("driveStrength") nettype = oneOf( "wire tri tri1 supply0 wand triand tri0 supply1 wor trior trireg" ) expandRange = Optional(oneOf("scalared vectored")) + range realDecl = Group("real" + delimitedList(identifier) + SEMI) eventDecl = Group("event" + delimitedList(identifier) + SEMI) blockDecl = (parameterDecl | regDecl | integerDecl | realDecl | timeDecl | eventDecl) stmt = Forward().setName("stmt") #.setDebug() stmtOrNull = stmt | SEMI caseItem = ( delimitedList( expr ) + COLON + stmtOrNull ) | \ ( default + Optional(":") + stmtOrNull ) stmt << Group( (begin + Group(ZeroOrMore(stmt)) + end).setName("begin-end") | (if_ + Group(LPAR + expr + RPAR) + stmtOrNull + Optional(else_ + stmtOrNull)).setName("if") | (delayOrEventControl + stmtOrNull) | (case + LPAR + expr + RPAR + OneOrMore(caseItem) + endcase) | (forever + stmt) | (repeat + LPAR + expr + RPAR + stmt) | (while_ + LPAR + expr + RPAR + stmt) | (for_ + LPAR + assgnmt + SEMI + Group(expr) + SEMI + assgnmt + RPAR + stmt) | (fork + ZeroOrMore(stmt) + join) | (fork + COLON + identifier + ZeroOrMore(blockDecl) + ZeroOrMore(stmt) + end) | (wait + LPAR + expr + RPAR + stmtOrNull) | ("->" + identifier + SEMI) | (disable + identifier + SEMI) | (assign + assgnmt + SEMI) | (deassign + lvalue + SEMI) | (force + assgnmt + SEMI) | (release + lvalue + SEMI) | (begin + COLON + identifier + ZeroOrMore(blockDecl) + ZeroOrMore(stmt) + end).setName("begin:label-end") | # these *have* to go at the end of the list!!! (assgnmt + SEMI) | (nbAssgnmt + SEMI) | (Combine(Optional("$") + identifier) + Optional(LPAR + delimitedList(expr | empty) + RPAR) + SEMI)).setName("stmtBody") """ x::=<blocking_assignment> ; x||= <non_blocking_assignment> ; x||= if ( <expression> ) <statement_or_null> x||= if ( <expression> ) <statement_or_null> else <statement_or_null> x||= case ( <expression> ) <case_item>+ endcase x||= casez ( <expression> ) <case_item>+ endcase x||= casex ( <expression> ) <case_item>+ endcase x||= forever <statement> x||= repeat ( <expression> ) <statement> x||= while ( <expression> ) <statement> x||= for ( <assignment> ; <expression> ; <assignment> ) <statement> x||= <delay_or_event_control> <statement_or_null> x||= wait ( <expression> ) <statement_or_null> x||= -> <name_of_event> ; x||= <seq_block> x||= <par_block> x||= <task_enable> x||= <system_task_enable> x||= disable <name_of_task> ; x||= disable <name_of_block> ; x||= assign <assignment> ; x||= deassign <lvalue> ; x||= force <assignment> ; x||= release <lvalue> ; """ alwaysStmt = Group("always" + Optional(eventControl) + stmt).setName("alwaysStmt") initialStmt = Group("initial" + stmt).setName("initialStmt") chargeStrength = Group(LPAR + oneOf("small medium large") + RPAR).setName("chargeStrength") continuousAssign = Group(assign + Optional(driveStrength) + Optional(delay) + delimitedList(assgnmt) + SEMI).setName("continuousAssign") tfDecl = (parameterDecl | inputDecl | outputDecl | inoutDecl | regDecl | timeDecl | integerDecl | realDecl) functionDecl = Group("function" + Optional(range | "integer" | "real") + identifier + SEMI + Group(OneOrMore(tfDecl)) + Group(ZeroOrMore(stmt)) + "endfunction") inputOutput = oneOf("input output") netDecl1Arg = (nettype + Optional(expandRange) + Optional(delay) + Group(delimitedList(~inputOutput + identifier))) netDecl2Arg = ("trireg" + Optional(chargeStrength) + Optional(expandRange) + Optional(delay) + Group(delimitedList(~inputOutput + identifier))) netDecl3Arg = (nettype + Optional(driveStrength) + Optional(expandRange) + Optional(delay) + Group(delimitedList(assgnmt))) netDecl1 = Group(netDecl1Arg + SEMI).setName("netDecl1") netDecl2 = Group(netDecl2Arg + SEMI).setName("netDecl2") netDecl3 = Group(netDecl3Arg + SEMI).setName("netDecl3") gateType = oneOf("and nand or nor xor xnor buf bufif0 bufif1 " "not notif0 notif1 pulldown pullup nmos rnmos " "pmos rpmos cmos rcmos tran rtran tranif0 " "rtranif0 tranif1 rtranif1") gateInstance = Optional( Group( identifier + Optional( range ) ) ) + \ LPAR + Group( delimitedList( expr ) ) + RPAR gateDecl = Group(gateType + Optional(driveStrength) + Optional(delay) + delimitedList(gateInstance) + SEMI) udpInstance = Group( Group(identifier + Optional(range | subscrRef)) + LPAR + Group(delimitedList(expr)) + RPAR) udpInstantiation = Group(identifier - Optional(driveStrength) + Optional(delay) + delimitedList(udpInstance) + SEMI).setName("udpInstantiation") parameterValueAssignment = Group( Literal("#") + LPAR + Group(delimitedList(expr)) + RPAR) namedPortConnection = Group(DOT + identifier + LPAR + expr + RPAR).setName( "namedPortConnection") #.setDebug() assert (r'.\abc (abc )' == namedPortConnection) modulePortConnection = expr | empty #~ moduleInstance = Group( Group ( identifier + Optional(range) ) + #~ ( delimitedList( modulePortConnection ) | #~ delimitedList( namedPortConnection ) ) ) inst_args = Group(LPAR + (delimitedList(namedPortConnection) | delimitedList(modulePortConnection)) + RPAR).setName("inst_args") moduleInstance = Group( Group(identifier + Optional(range)) + inst_args).setName( "moduleInstance") #.setDebug() moduleInstantiation = Group( identifier + Optional(parameterValueAssignment) + delimitedList(moduleInstance).setName("moduleInstanceList") + SEMI).setName("moduleInstantiation") parameterOverride = Group("defparam" + delimitedList(paramAssgnmt) + SEMI) task = Group("task" + identifier + SEMI + ZeroOrMore(tfDecl) + stmtOrNull + "endtask") specparamDecl = Group("specparam" + delimitedList(paramAssgnmt) + SEMI) pathDescr1 = Group(LPAR + subscrIdentifier + "=>" + subscrIdentifier + RPAR) pathDescr2 = Group(LPAR + Group(delimitedList(subscrIdentifier)) + "*>" + Group(delimitedList(subscrIdentifier)) + RPAR) pathDescr3 = Group(LPAR + Group(delimitedList(subscrIdentifier)) + "=>" + Group(delimitedList(subscrIdentifier)) + RPAR) pathDelayValue = Group(( LPAR + Group(delimitedList(mintypmaxExpr | expr)) + RPAR) | mintypmaxExpr | expr) pathDecl = Group((pathDescr1 | pathDescr2 | pathDescr3) + EQ + pathDelayValue + SEMI).setName("pathDecl") portConditionExpr = Forward() portConditionTerm = Optional(unop) + subscrIdentifier portConditionExpr << portConditionTerm + Optional(binop + portConditionExpr) polarityOp = oneOf("+ -") levelSensitivePathDecl1 = Group(if_ + Group(LPAR + portConditionExpr + RPAR) + subscrIdentifier + Optional(polarityOp) + "=>" + subscrIdentifier + EQ + pathDelayValue + SEMI) levelSensitivePathDecl2 = Group( if_ + Group(LPAR + portConditionExpr + RPAR) + LPAR + Group(delimitedList(subscrIdentifier)) + Optional(polarityOp) + "*>" + Group(delimitedList(subscrIdentifier)) + RPAR + EQ + pathDelayValue + SEMI) levelSensitivePathDecl = levelSensitivePathDecl1 | levelSensitivePathDecl2 edgeIdentifier = posedge | negedge edgeSensitivePathDecl1 = Group( Optional(if_ + Group(LPAR + expr + RPAR)) + LPAR + Optional(edgeIdentifier) + subscrIdentifier + "=>" + LPAR + subscrIdentifier + Optional(polarityOp) + COLON + expr + RPAR + RPAR + EQ + pathDelayValue + SEMI) edgeSensitivePathDecl2 = Group( Optional(if_ + Group(LPAR + expr + RPAR)) + LPAR + Optional(edgeIdentifier) + subscrIdentifier + "*>" + LPAR + delimitedList(subscrIdentifier) + Optional(polarityOp) + COLON + expr + RPAR + RPAR + EQ + pathDelayValue + SEMI) edgeSensitivePathDecl = edgeSensitivePathDecl1 | edgeSensitivePathDecl2 edgeDescr = oneOf("01 10 0x x1 1x x0").setName("edgeDescr") timCheckEventControl = Group(posedge | negedge | (edge + LBRACK + delimitedList(edgeDescr) + RBRACK)) timCheckCond = Forward() timCondBinop = oneOf("== === != !==") timCheckCondTerm = (expr + timCondBinop + scalarConst) | (Optional("~") + expr) timCheckCond << ((LPAR + timCheckCond + RPAR) | timCheckCondTerm) timCheckEvent = Group( Optional(timCheckEventControl) + subscrIdentifier + Optional("&&&" + timCheckCond)) timCheckLimit = expr controlledTimingCheckEvent = Group(timCheckEventControl + subscrIdentifier + Optional("&&&" + timCheckCond)) notifyRegister = identifier systemTimingCheck1 = Group("$setup" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional(COMMA + notifyRegister) + RPAR + SEMI) systemTimingCheck2 = Group("$hold" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional(COMMA + notifyRegister) + RPAR + SEMI) systemTimingCheck3 = Group("$period" + LPAR + controlledTimingCheckEvent + COMMA + timCheckLimit + Optional(COMMA + notifyRegister) + RPAR + SEMI) systemTimingCheck4 = Group("$width" + LPAR + controlledTimingCheckEvent + COMMA + timCheckLimit + Optional(COMMA + expr + COMMA + notifyRegister) + RPAR + SEMI) systemTimingCheck5 = Group("$skew" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional(COMMA + notifyRegister) + RPAR + SEMI) systemTimingCheck6 = Group("$recovery" + LPAR + controlledTimingCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + Optional(COMMA + notifyRegister) + RPAR + SEMI) systemTimingCheck7 = Group("$setuphold" + LPAR + timCheckEvent + COMMA + timCheckEvent + COMMA + timCheckLimit + COMMA + timCheckLimit + Optional(COMMA + notifyRegister) + RPAR + SEMI) systemTimingCheck = ( FollowedBy('$') + (systemTimingCheck1 | systemTimingCheck2 | systemTimingCheck3 | systemTimingCheck4 | systemTimingCheck5 | systemTimingCheck6 | systemTimingCheck7)).setName("systemTimingCheck") sdpd = if_ + Group(LPAR + expr + RPAR) + \ ( pathDescr1 | pathDescr2 ) + EQ + pathDelayValue + SEMI specifyItem = ~Keyword("endspecify") + ( specparamDecl | pathDecl | levelSensitivePathDecl | edgeSensitivePathDecl | systemTimingCheck | sdpd) """ x::= <specparam_declaration> x||= <path_declaration> x||= <level_sensitive_path_declaration> x||= <edge_sensitive_path_declaration> x||= <system_timing_check> x||= <sdpd> """ specifyBlock = Group("specify" + ZeroOrMore(specifyItem) + "endspecify").setName("specifyBlock") moduleItem = ~Keyword("endmodule") + ( parameterDecl | inputDecl | outputDecl | inoutDecl | regDecl | netDecl3 | netDecl1 | netDecl2 | timeDecl | integerDecl | realDecl | eventDecl | gateDecl | parameterOverride | continuousAssign | specifyBlock | initialStmt | alwaysStmt | task | functionDecl | # these have to be at the end - they start with identifiers moduleInstantiation | udpInstantiation) """ All possible moduleItems, from Verilog grammar spec x::= <parameter_declaration> x||= <input_declaration> x||= <output_declaration> x||= <inout_declaration> ?||= <net_declaration> (spec does not seem consistent for this item) x||= <reg_declaration> x||= <time_declaration> x||= <integer_declaration> x||= <real_declaration> x||= <event_declaration> x||= <gate_declaration> x||= <UDP_instantiation> x||= <module_instantiation> x||= <parameter_override> x||= <continuous_assign> x||= <specify_block> x||= <initial_statement> x||= <always_statement> x||= <task> x||= <function> """ portRef = subscrIdentifier portExpr = portRef | Group(LBRACE + delimitedList(portRef) + RBRACE) port = portExpr | Group((DOT + identifier + LPAR + portExpr + RPAR)) moduleHdr = Group( oneOf("module macromodule") + identifier + Optional(LPAR + Group( Optional( delimitedList( Group( oneOf("input output") + (netDecl1Arg | netDecl2Arg | netDecl3Arg)) | port))) + RPAR) + SEMI).setName("moduleHdr") module = Group(moduleHdr + Group(ZeroOrMore(moduleItem)) + "endmodule").setName("module") #.setDebug() udpDecl = outputDecl | inputDecl | regDecl #~ udpInitVal = oneOf("1'b0 1'b1 1'bx 1'bX 1'B0 1'B1 1'Bx 1'BX 1 0 x X") udpInitVal = (Regex("1'[bB][01xX]") | Regex("[01xX]")).setName("udpInitVal") udpInitialStmt = Group("initial" + identifier + EQ + udpInitVal + SEMI).setName("udpInitialStmt") levelSymbol = oneOf("0 1 x X ? b B") levelInputList = Group(OneOrMore(levelSymbol).setName("levelInpList")) outputSymbol = oneOf("0 1 x X") combEntry = Group(levelInputList + COLON + outputSymbol + SEMI) edgeSymbol = oneOf("r R f F p P n N *") edge = Group( LPAR + levelSymbol + levelSymbol + RPAR ) | \ Group( edgeSymbol ) edgeInputList = Group( ZeroOrMore(levelSymbol) + edge + ZeroOrMore(levelSymbol)) inputList = levelInputList | edgeInputList seqEntry = Group(inputList + COLON + levelSymbol + COLON + (outputSymbol | "-") + SEMI).setName("seqEntry") udpTableDefn = Group("table" + OneOrMore(combEntry | seqEntry) + "endtable").setName("table") """ <UDP> ::= primitive <name_of_UDP> ( <name_of_variable> <,<name_of_variable>>* ) ; <UDP_declaration>+ <UDP_initial_statement>? <table_definition> endprimitive """ udp = Group("primitive" + identifier + LPAR + Group(delimitedList(identifier)) + RPAR + SEMI + OneOrMore(udpDecl) + Optional(udpInitialStmt) + udpTableDefn + "endprimitive") verilogbnf = OneOrMore(module | udp) + StringEnd() verilogbnf.ignore(cppStyleComment) verilogbnf.ignore(compilerDirective) return verilogbnf
| Regex(long_name).setParseAction(upcaseTokens) | upkey(short_name) | Regex(short_name).setParseAction(upcaseTokens))).setResultsName( long_name) interval = (make_interval("year", "y") | make_interval("month", "month") | make_interval("week", "w") | make_interval("day", "d") | make_interval("hour", "h") | make_interval("millisecond", "ms") | make_interval("minute", "m") | make_interval("second", "s") | make_interval("microsecond", "us")) intervals = OneOrMore(interval) interval_fxn = Group( function("interval", quoted(intervals), caseless=True, optparen=True)).setResultsName("interval") ts_expression = Forward() ts_expression <<= (Group(ts_functions + oneOf("+ -") + interval_fxn).setResultsName("ts_expression") | ts_functions | Group( function("ms", Group(ts_expression), caseless=True)).setResultsName("ts_function")) value <<= Group(ts_expression | primitive | set_ | _emptyset | list_ | dict_).setName("value") var_val = value | var.setResultsName("field")
def graph_definition(): global graphparser if not graphparser: # punctuation colon = Literal(":") lbrace = Literal("{") rbrace = Literal("}") lbrack = Literal("[") rbrack = Literal("]") lparen = Literal("(") rparen = Literal(")") equals = Literal("=") comma = Literal(",") dot = Literal(".") slash = Literal("/") bslash = Literal("\\") star = Literal("*") semi = Literal(";") at = Literal("@") minus = Literal("-") # keywords strict_ = CaselessLiteral("strict") graph_ = CaselessLiteral("graph") digraph_ = CaselessLiteral("digraph") subgraph_ = CaselessLiteral("subgraph") node_ = CaselessLiteral("node") edge_ = CaselessLiteral("edge") # token definitions identifier = Word(alphanums + "_.").setName("identifier") double_quoted_string = QuotedString( '"', multiline=True, unquoteResults=False) # dblQuotedString alphastring_ = OneOrMore(CharsNotIn(_noncomma + ' ')) def parse_html(s, loc, toks): return '<%s>' % ''.join(toks[0]) opener = '<' closer = '>' html_text = nestedExpr( opener, closer, (CharsNotIn(opener + closer))).setParseAction(parse_html).leaveWhitespace() ID = ( identifier | html_text | double_quoted_string | #.setParseAction(strip_quotes) | alphastring_).setName("ID") float_number = Combine(Optional(minus) + OneOrMore(Word(nums + "."))).setName("float_number") righthand_id = (float_number | ID).setName("righthand_id") port_angle = (at + ID).setName("port_angle") port_location = (OneOrMore(Group(colon + ID)) | Group(colon + lparen + ID + comma + ID + rparen)).setName("port_location") port = (Group(port_location + Optional(port_angle)) | Group(port_angle + Optional(port_location))).setName("port") node_id = (ID + Optional(port)) a_list = OneOrMore(ID + Optional(equals + righthand_id) + Optional(comma.suppress())).setName("a_list") attr_list = OneOrMore(lbrack.suppress() + Optional(a_list) + rbrack.suppress()).setName("attr_list") attr_stmt = (Group(graph_ | node_ | edge_) + attr_list).setName("attr_stmt") edgeop = (Literal("--") | Literal("->")).setName("edgeop") stmt_list = Forward() graph_stmt = Group(lbrace.suppress() + Optional(stmt_list) + rbrace.suppress() + Optional(semi.suppress())).setName("graph_stmt") edge_point = Forward() edgeRHS = OneOrMore(edgeop + edge_point) edge_stmt = edge_point + edgeRHS + Optional(attr_list) subgraph = Group(subgraph_ + Optional(ID) + graph_stmt).setName("subgraph") edge_point << Group(subgraph | graph_stmt | node_id).setName('edge_point') node_stmt = (node_id + Optional(attr_list) + Optional(semi.suppress())).setName("node_stmt") assignment = (ID + equals + righthand_id).setName("assignment") stmt = (assignment | edge_stmt | attr_stmt | subgraph | graph_stmt | node_stmt).setName("stmt") stmt_list << OneOrMore(stmt + Optional(semi.suppress())) graphparser = OneOrMore((Optional(strict_) + Group( (graph_ | digraph_)) + Optional(ID) + graph_stmt).setResultsName("graph")) singleLineComment = Group("//" + restOfLine) | Group("#" + restOfLine) # actions graphparser.ignore(singleLineComment) graphparser.ignore(cStyleComment) assignment.setParseAction(push_attr_list) a_list.setParseAction(push_attr_list) edge_stmt.setParseAction(push_edge_stmt) node_stmt.setParseAction(push_node_stmt) attr_stmt.setParseAction(push_default_stmt) subgraph.setParseAction(push_subgraph_stmt) graph_stmt.setParseAction(push_graph_stmt) graphparser.setParseAction(push_top_graph_stmt) return graphparser
def strParsing(self, rcg_string): left_p = Literal("(") right_p = Literal(")") frame_number = Word(nums) teamscore_result_name = Word(alphanums) teamscore_result_value = Word(alphanums) teamscore_result_score = Word(nums) # This needs to be taken care of by AST because some teams have '_' in their names teamscore_result = ( teamscore_result_name + "_" + teamscore_result_value + Optional("_" + teamscore_result_score)).setParseAction( rcgParsing.get_team_result) # Playmode # Playmode list play_mode_list = (Word(" play_on") ^ Word(" time_over") ^ Word(" free_kick_r") ^ Word(" free_kick_l") ^ Word(" indirect_free_kick_l") ^ Word(" indirect_free_kick_r") ^ Word(" kick_in_l") ^ Word(" kick_in_r") ^ Word(" foul_charge_r") ^ Word(" foul_charge_l") ^ Word(" kick_off_l") ^ Word(" kick_off_r") ^ Word(" corner_kick_l") ^ Word(" corner_kick_r") ^ Word(" offside_r") ^ Word(" offside_l") ^ Word(" foul_charge_l") ^ Word(" foul_charge_r") ^ Word(" goal_kick_l") ^ Word(" goal_kick_r") ^ Word(" penalty_setup_l") ^ Word(" penalty_setup_r") ^ Word(" penalty_ready_l") ^ Word(" penalty_ready_r") ^ Word(" penalty_taken_l") ^ Word(" penalty_taken_r") ^ Word(" penalty_miss_l") ^ Word(" penalty_miss_r") ^ Word(" penalty_score_r") ^ Word(" penalty_score_l")) play_mode = (Word("playmode ") + Word(nums) + play_mode_list).setParseAction( rcgParsing.goal_notification) # Teamname team_name = Combine( Word(alphanums) + Optional(OneOrMore((Literal("-") | Literal("_")) + Word(alphanums)))) # Teamscore team_score = Word("team ") + Word( nums) + team_name + team_name + Word(nums) * 2 team_score_penalty = Word("team ") + Word( nums) + team_name + team_name + Word(nums) * 6 # Frame and ball information show_frame = Word("show ") + frame_number.setParseAction( rcgParsing.get_current_frame) ball = left_p + left_p + Literal( "b") + right_p + Word(nums + "-.") * 4 + right_p # Player information player_number = left_p + (Word("r") ^ Word("l")) + Word(nums) + right_p # Player positions player_position = Word(alphanums + "-.") # Player view mode - H for high and L for low view_mode = left_p + Literal("v") + ( Word("h") ^ Word("l")) + Word(nums) + right_p stamina = left_p + Literal("s") + Word(nums + "-.") * 4 + right_p # Outer flag rules flag_pos = Word("lrbtc", max=1) field_side = Word("lr", max=1) distance_from_center = Word(nums) outer_flag = flag_pos + ZeroOrMore(field_side) + distance_from_center # Inner flag rules inner_flag_pos = Word("lrc", max=1) inner_flag = inner_flag_pos + (Word("b") ^ Word("t")) # Center flag center_flag = Literal("c") flag = left_p + Literal("f") + (outer_flag ^ inner_flag ^ center_flag) + right_p # Additional information additional = left_p + Literal("c") + Word(nums + "-.") * 11 + right_p player = left_p + player_number + ZeroOrMore( player_position) + view_mode + stamina + ZeroOrMore( flag) + additional + right_p # Start of game start = Word("ULG5") server_param = "server_param " + SkipTo(lineEnd) player_param = "player_param " + SkipTo(lineEnd) player_type = "player_type " + SkipTo(lineEnd) # End game - (msg 6000 1 "(result 201806211300 CYRUS2018_0-vs-HELIOS2018_1)") end_game = Word("result") + Word(nums) + teamscore_result + Suppress( "-vs-") + teamscore_result + Suppress(right_p) + Suppress( '"').setParseAction(rcgParsing.game_has_ended) team_graphic = (Word("team_graphic_l") ^ Word("team_graphic_r")) + SkipTo(lineEnd) msg = "msg" + frame_number + Word(nums) + Suppress('"') + Suppress( left_p) + (end_game | team_graphic) # Frame lines frame_line1 = show_frame + ball + (player * 11) frame_line2 = (player * 11) read_line = start ^ (left_p + (server_param ^ player_param ^ player_type ^ msg ^ ((frame_line1 + frame_line2) ^ play_mode ^ team_score ^ team_score_penalty) + right_p)) return read_line.parseString(rcg_string)
def init_grammar(self): """Set up the parsing classes Any changes to the grammar of the config file be done here. """ # Some syntax that we need, but don't care about SEMICOLON = (Suppress(";")) EQUALS = Suppress("=") # Top Section FILE_NAME = Word(alphas + nums + '-_.') alignment_def = Keyword('alignment') + EQUALS\ + FILE_NAME + SEMICOLON alignment_def.setParseAction(self.set_alignment) tree_def = Keyword('user_tree_topology') + EQUALS\ + FILE_NAME + SEMICOLON tree_def.setParseAction(self.set_user_tree) def simple_option(name): opt = Keyword(name) + EQUALS +\ Word(alphas + nums + '-_') + SEMICOLON opt.setParseAction(self.set_simple_option) return opt branch_def = simple_option('branchlengths') MODEL_NAME = Word(alphas + nums + '+' + ' ' + '_') model_list = delimitedList(MODEL_NAME) model_def = 'models' + EQUALS + model_list + SEMICOLON model_def.setParseAction(self.set_models) model_selection_def = simple_option("model_selection") top_section = alignment_def + Optional(tree_def) + branch_def + \ model_def + model_selection_def # Data Block Parsing column = Word(nums) block_name = Word(alphas + '_-' + nums) block_def = column("start") +\ Optional(Suppress("-") + column("end")) +\ Optional(Suppress("\\") + column("step")) block_def.setParseAction(self.define_range) block_list_def = Group(OneOrMore(Group(block_def))) user_subset_def = Optional("charset") + block_name("name") + \ EQUALS + block_list_def("parts") + SEMICOLON user_subset_def.setParseAction(self.define_user_subset) block_def_list = OneOrMore(Group(user_subset_def)) block_section = Suppress("[data_blocks]") + block_def_list block_def_list.setParseAction(self.check_blocks) # Scheme Parsing scheme_name = Word(alphas + '_-' + nums) # Make a copy, cos we set a different action on it user_subset_ref = block_name.copy() user_subset_ref.setParseAction(self.check_block_exists) subset = Group( Suppress("(") + delimitedList(user_subset_ref("name")) + Suppress(")")) subset.setParseAction(self.define_subset_grouping) scheme = Group(OneOrMore(subset)) scheme_def = scheme_name("name") + \ EQUALS + scheme("scheme") + SEMICOLON scheme_def.setParseAction(self.define_scheme) scheme_list = OneOrMore(Group(scheme_def)) scheme_algo = simple_option("search") scheme_section = \ Suppress("[schemes]") + scheme_algo + Optional(scheme_list) # We've defined the grammar for each section. # Here we just put it all together self.config_parser = (top_section + block_section + scheme_section + stringEnd)
def __init__(self, query: OptionalType[str]) -> None: self._methods = { 'and': self.evaluate_and, 'or': self.evaluate_or, 'not': self.evaluate_not, 'parenthesis': self.evaluate_parenthesis, 'quotes': self.evaluate_quotes, 'word': self.evaluate_word, } self.line = '' self.query = query.lower() if query else '' if self.query: # TODO: Cleanup operator_or = Forward() operator_word = Group(Word(alphanums)).setResultsName('word') operator_quotes_content = Forward() operator_quotes_content << ((operator_word + operator_quotes_content) | operator_word) operator_quotes = ( Group(Suppress('"') + operator_quotes_content + Suppress('"')).setResultsName( 'quotes' ) | operator_word ) operator_parenthesis = ( Group((Suppress('(') + operator_or + Suppress(")"))).setResultsName('parenthesis') | operator_quotes ) operator_not = Forward() operator_not << ( Group(Suppress(Keyword('no', caseless=True)) + operator_not).setResultsName('not') | operator_parenthesis ) operator_and = Forward() operator_and << ( Group( operator_not + Suppress(Keyword('and', caseless=True)) + operator_and ).setResultsName('and') | Group(operator_not + OneOrMore(~oneOf('and or') + operator_and)).setResultsName( 'and' ) | operator_not ) operator_or << ( Group( operator_and + Suppress(Keyword('or', caseless=True)) + operator_or ).setResultsName('or') | operator_and ) self._query_parser = operator_or.parseString(self.query)[0] else: self._query_parser = False time_cmpnt = Word(nums).setParseAction(lambda t: t[0].zfill(2)) date = Combine( (time_cmpnt + '-' + time_cmpnt + '-' + time_cmpnt) + ' ' + time_cmpnt + ':' + time_cmpnt + Optional(':' + time_cmpnt) ) word = Word(printables) self._log_parser = ( date.setResultsName('timestamp') + word.setResultsName('log_level') + word.setResultsName('plugin') + ( White(min=16).setParseAction(lambda s, l, t: [t[0].strip()]).setResultsName('task') | (White(min=1).suppress() & word.setResultsName('task')) ) + restOfLine.setResultsName('message') )
foreign key (student_id) references students(student_id); alter table only student_registrations add constraint classes_link foreign key (class_id) references classes(class_id); """.upper() from pyparsing import Literal, CaselessLiteral, Word, delimitedList \ ,Optional, Combine, Group, alphas, nums, alphanums, Forward \ , oneOf, sglQuotedString, OneOrMore, ZeroOrMore, CharsNotIn \ , replaceWith skobki = "(" + ZeroOrMore(CharsNotIn(")")) + ")" field_def = OneOrMore(Word(alphas,alphanums+"_\"':-") | skobki) def field_act(s,loc,tok): return ("<"+tok[0]+"> " + " ".join(tok)).replace("\"","\\\"") field_def.setParseAction(field_act) field_list_def = delimitedList( field_def ) def field_list_act(toks): return " | ".join(toks) field_list_def.setParseAction(field_list_act) create_table_def = Literal("CREATE") + "TABLE" + Word(alphas,alphanums+"_").setResultsName("tablename") + \ "("+field_list_def.setResultsName("columns")+")"+ ";"
for i in range(0, len(lstErrorText)): itemText = lstErrorText[i] itemTextPreprocess = itemText.replace('(', '_P_').replace('(', '_PC_').replace( '[', '_S_').replace(']', '_SC_') try: indexRun = 0 strPOSNLTK = '{}' while (str(strPOSNLTK) == '{}' and indexRun < maxRun): try: lstNonT = [] lstT = [] best = parser.parse(itemTextPreprocess) strParseContent = str(best.get_parser_best().ptb_parse) print(strParseContent) dataParseResult = OneOrMore( nestedExpr()).parseString(strParseContent) strPOSNLTK = walkAndGetPOSJSonByNLTK(dataParseResult, 0, lstNonT, lstT) except: traceback.print_exc() strPOSNLTK = '{}' if str(strPOSNLTK) == '{}': print('text error\n{}'.format(itemText)) indexRun = indexRun + 1 input('error here') # else: # print('success\n{}'.format(itemText)) lstItemTexts.append(itemText) lstItemPOSs.append(strPOSNLTK) except: traceback.print_exc()
# Negative assumptions which have not been disproven should incur risk. DISPROVE + ASSUMPTIONS, # Interaction Optional(ORDINAL) + SUBJECT + Optional(LATERALLY) + ACTION + EFFECT_LIST + Optional(TO_FROM + OBJECT) + Optional(Optional(BROADLY) + RISKING + THREAT_LIST) + Optional(WITH_NOTES + NOTES), # Mitigation LABEL + (IMPERATIVE ^ HAS) + BE + (IMPLEMENTED ^ VERIFIED) + ON + (DATA_LIST ^ (ALL_DATA + Optional(EXCEPT + DATA_EXCEPTIONS))) + OneOrMore( MatchFirst([ BETWEEN + ( ELEMENT_PAIR_LIST ^ (ALL_NODES + Optional(EXCEPT + ELEMENT_PAIR_EXCEPTIONS)) ), WITHIN + ( ELEMENT_LIST ^ (ALL_NODES + Optional(EXCEPT + ELEMENT_EXCEPTIONS)) ) ]) ) ] # This allows commenting out lines in the threat model file. constructs = [lineStart + c + Optional(Literal('.')) for c in list(constructs)] construct_keys = [ 'inclusion', 'element', 'datum', 'threat',
elements = ['Ac', 'Ag', 'Al', 'Am', 'Ar', 'As', 'At', 'Au', 'B', 'Ba', 'Be', 'Bh', 'Bi', 'Bk', 'Br', 'C', 'Ca', 'Cd', 'Ce', 'Cf', 'Cl', 'Cm', 'Cn', 'Co', 'Cr', 'Cs', 'Cu', 'Db', 'Ds', 'Dy', 'Er', 'Es', 'Eu', 'F', 'Fe', 'Fl', 'Fm', 'Fr', 'Ga', 'Gd', 'Ge', 'H', 'He', 'Hf', 'Hg', 'Ho', 'Hs', 'I', 'In', 'Ir', 'K', 'Kr', 'La', 'Li', 'Lr', 'Lu', 'Lv', 'Md', 'Mg', 'Mn', 'Mo', 'Mt', 'N', 'Na', 'Nb', 'Nd', 'Ne', 'Ni', 'No', 'Np', 'O', 'Os', 'P', 'Pa', 'Pb', 'Pd', 'Pm', 'Po', 'Pr', 'Pt', 'Pu', 'Ra', 'Rb', 'Re', 'Rf', 'Rg', 'Rh', 'Rn', 'Ru', 'S', 'Sb', 'Sc', 'Se', 'Sg', 'Si', 'Sm', 'Sn', 'Sr', 'Ta', 'Tb', 'Tc', 'Te', 'Th', 'Ti', 'Tl', 'Tm', 'U', 'Uuo', 'Uup', 'Uus', 'Uut', 'V', 'W', 'Xe', 'Y', 'Yb', 'Zn', 'Zr'] digits = map(str, range(10)) symbols = list("[](){}^+-/") phases = ["(s)", "(l)", "(g)", "(aq)"] tokens = reduce(lambda a, b: a ^ b, map(Literal, elements + digits + symbols + phases)) tokenizer = OneOrMore(tokens) + StringEnd() def _orjoin(l): return "'" + "' | '".join(l) + "'" ## Defines an NLTK parser for tokenized expressions grammar = """ S -> multimolecule | multimolecule '+' S multimolecule -> count molecule | molecule count -> number | number '/' number molecule -> unphased | unphased phase unphased -> group | paren_group_round | paren_group_square element -> """ + _orjoin(elements) + """ digit -> """ + _orjoin(digits) + """ phase -> """ + _orjoin(phases) + """
refer_component(components.Filter.ParsedFunctionFilter))) if DEBUG: Constraint.setName('Constraint') # Filter: FILTER = Suppress(CaselessKeyword('FILTER')) Filter = (FILTER + Constraint).setName('Filter') # GraphNode is recursively defined in terms of Collection, ObjectList, # PropertyListNotEmpty, and TriplesNode. GraphNode = Forward() if DEBUG: GraphNode.setName('GraphNode') # Collection: Collection = (LP + Group(OneOrMore(GraphNode)) + RP).setParseAction( refer_component(components.Resource.ParsedCollection)) if DEBUG: Collection.setName('Collection') # ObjectList: ObjectList = Group(GraphNode + ZeroOrMore(COMMA + GraphNode)) if DEBUG: ObjectList.setName('ObjectList') # PropertyListNotEmpty: PropertyListItem = (Verb + ObjectList).setParseAction( refer_component(components.Triples.PropertyValue)) if DEBUG: PropertyListItem.setName('PropertyListItem') PropertyListNotEmpty = Group(PropertyListItem +
def make_multiple(to_repeat): """Shorthand for handling repeated tokens ('and', ',', 'through')""" return ((to_repeat + Optional(intro_text_marker)).setResultsName("head") + OneOrMore((atomic.conj_phrases + to_repeat + Optional(intro_text_marker)).setResultsName( "tail", listAllMatches=True)))
def _tdb_grammar(): #pylint: disable=R0914 """ Convenience function for getting the pyparsing grammar of a TDB file. """ int_number = Word(nums).setParseAction(lambda t: [int(t[0])]) pos_neg_int_number = Word('+-' + nums).setParseAction( lambda t: [int(t[0])]) # '+3' or '-2' are examples # matching float w/ regex is ugly but is recommended by pyparsing regex_after_decimal = r'([0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)' float_number = Regex(r'[-+]?([0-9]+\.(?!([0-9]|[eE])))|{0}'.format(regex_after_decimal)) \ .setParseAction(lambda t: [float(t[0])]) # symbol name, e.g., phase name, function name symbol_name = Word(alphanums + '_:', min=1) ref_phase_name = symbol_name = Word(alphanums + '_-:()/', min=1) # species name, e.g., CO2, AL, FE3+ species_name = Word(alphanums + '+-*/_.', min=1) + Optional(Suppress('%')) # constituent arrays are colon-delimited # each subarray can be comma- or space-delimited constituent_array = Group( delimitedList(Group(OneOrMore(Optional(Suppress(',')) + species_name)), ':')) param_types = MatchFirst( [TCCommand(param_type) for param_type in TDB_PARAM_TYPES]) # Let sympy do heavy arithmetic / algebra parsing for us # a convenience function will handle the piecewise details func_expr = (float_number | ZeroOrMore(',').setParseAction(lambda t: 0.01)) + OneOrMore(SkipTo(';') \ + Suppress(';') + ZeroOrMore(Suppress(',')) + Optional(float_number) + \ Suppress(Word('YNyn', exact=1) | White())) # ELEMENT cmd_element = TCCommand('ELEMENT') + Word(alphas+'/-', min=1, max=2) + Optional(Suppress(ref_phase_name)) + \ Optional(Suppress(OneOrMore(float_number))) + LineEnd() # SPECIES cmd_species = TCCommand('SPECIES') + species_name + Group( OneOrMore( Word(alphas, min=1, max=2) + Optional(float_number, default=1.0)) ) + Optional(Suppress('/') + pos_neg_int_number) + LineEnd() # TYPE_DEFINITION cmd_typedef = TCCommand('TYPE_DEFINITION') + \ Suppress(White()) + CharsNotIn(' !', exact=1) + SkipTo(LineEnd()) # FUNCTION cmd_function = TCCommand('FUNCTION') + symbol_name + \ func_expr.setParseAction(_make_piecewise_ast) # ASSESSED_SYSTEMS cmd_ass_sys = TCCommand('ASSESSED_SYSTEMS') + SkipTo(LineEnd()) # DEFINE_SYSTEM_DEFAULT cmd_defsysdef = TCCommand('DEFINE_SYSTEM_DEFAULT') + SkipTo(LineEnd()) # DEFAULT_COMMAND cmd_defcmd = TCCommand('DEFAULT_COMMAND') + SkipTo(LineEnd()) # DATABASE_INFO cmd_database_info = TCCommand('DATABASE_INFO') + SkipTo(LineEnd()) # VERSION_DATE cmd_version_date = TCCommand('VERSION_DATE') + SkipTo(LineEnd()) # REFERENCE_FILE cmd_reference_file = TCCommand('REFERENCE_FILE') + SkipTo(LineEnd()) # ADD_REFERENCES cmd_add_ref = TCCommand('ADD_REFERENCES') + SkipTo(LineEnd()) # LIST_OF_REFERENCES cmd_lor = TCCommand('LIST_OF_REFERENCES') + SkipTo(LineEnd()) # TEMPERATURE_LIMITS cmd_templim = TCCommand('TEMPERATURE_LIMITS') + SkipTo(LineEnd()) # PHASE cmd_phase = TCCommand('PHASE') + symbol_name + \ Suppress(White()) + CharsNotIn(' !', min=1) + Suppress(White()) + \ Suppress(int_number) + Group(OneOrMore(float_number)) + LineEnd() # CONSTITUENT cmd_constituent = TCCommand('CONSTITUENT') + symbol_name + \ Suppress(White()) + Suppress(':') + constituent_array + \ Suppress(':') + LineEnd() # PARAMETER cmd_parameter = TCCommand('PARAMETER') + param_types + \ Suppress('(') + symbol_name + \ Optional(Suppress('&') + Word(alphas+'/-', min=1, max=2), default=None) + \ Suppress(',') + constituent_array + \ Optional(Suppress(';') + int_number, default=0) + \ Suppress(')') + func_expr.setParseAction(_make_piecewise_ast) # Now combine the grammar together all_commands = cmd_element | \ cmd_species | \ cmd_typedef | \ cmd_function | \ cmd_ass_sys | \ cmd_defsysdef | \ cmd_defcmd | \ cmd_database_info | \ cmd_version_date | \ cmd_reference_file | \ cmd_add_ref | \ cmd_lor | \ cmd_templim | \ cmd_phase | \ cmd_constituent | \ cmd_parameter return all_commands
None, 'Interpretations', None, None, m.level2, m.level3, m.level4 ])) # Not a context as one wouldn't list these for contextual purposes multiple_comments = ( Marker("comments") + make_multiple(atomic.section + unified.depth1_p) ).setParseAction( make_par_list(lambda m: [ None, 'Interpretations', m.section, _paren_join([m.p1, m.p2, m.p3, m.p4, m.plaintext_p5, m.plaintext_p6]) ])) multiple_interp_entries = ( Marker("entries") + Marker("for") + (atomic.section + unified.depth1_p).setResultsName("head") + OneOrMore( (atomic.conj_phrases + unified.any_depth_p).setResultsName( "tail", listAllMatches=True))).setParseAction( make_par_list(lambda m: [ None, None, m.section, m.p1, m.p2, m.p3, m.p4, m. plaintext_p5, m.plaintext_p6 ])) multiple_paragraphs = ((atomic.paragraphs_marker | atomic.paragraph_marker) + make_multiple(unified.any_depth_p)).setParseAction( make_par_list(lambda m: [ m.part, None, m.section, m.p1, m.p2, m.p3, m.p4, m.plaintext_p5, m.plaintext_p6 ])) # grammar which captures all of these possibilities token_patterns = (
def _create_grammar_6_0(): """Create the SYM 6.0 grammar. """ word = Word(printables.replace(';', '').replace(':', '')) positive_integer = Word(nums) number = Word(nums + '.Ee-+') lp = Suppress(Literal('(')) rp = Suppress(Literal(')')) lb = Suppress(Literal('[')) rb = Suppress(Literal(']')) name = Word(alphas + nums + '_-').setWhitespaceChars(' ') assign = Suppress(Literal('=')) comma = Suppress(Literal(',')) type_ = name version = Group(Keyword('FormatVersion') - assign - Keyword('6.0')) title = Group(Keyword('Title') - assign - QuotedString('"')) enum_value = Group(number + assign + QuotedString('"')) enum = Group( Suppress(Keyword('Enum')) - assign - name - Suppress(lp) + Group(delimitedList(enum_value)) - Suppress(rp)) sig_unit = Group(Literal('/u:') + word) sig_factor = Group(Literal('/f:') + word) sig_offset = Group(Literal('/o:') + word) sig_min = Group(Literal('/min:') + word) sig_max = Group(Literal('/max:') + word) sig_default = Group(Literal('/d:') + word) sig_long_name = Group(Literal('/ln:') + word) sig_enum = Group(Literal('/e:') + word) signal = Group( Suppress(Keyword('Sig')) - Suppress(assign) - name - type_ + Group(Optional(positive_integer)) + Group(Optional(Keyword('-m'))) + Group( Optional(sig_unit) + Optional(sig_factor) + Optional(sig_offset) + Optional(sig_min) + Optional(sig_max) + Optional(sig_default) + Optional(sig_long_name) + Optional(sig_enum))) symbol = Group( Suppress(lb) - name - Suppress(rb) - Group(Optional(Keyword('ID') + assign + word)) - Group(Keyword('Len') + assign + positive_integer) + Group( Optional( Keyword('Mux') + assign + word + positive_integer + comma + positive_integer + positive_integer)) + Group(Optional(Keyword('CycleTime') + assign + positive_integer)) + Group(Optional(Keyword('Timeout') + assign + positive_integer)) + Group(Optional(Keyword('MinInterval') + assign + positive_integer)) + Group( ZeroOrMore(Group( Keyword('Sig') + assign + name + positive_integer)))) enums = Group(Keyword('{ENUMS}') + Group(ZeroOrMore(enum))) signals = Group(Keyword('{SIGNALS}') + Group(ZeroOrMore(signal))) send = Group(Keyword('{SEND}') + Group(ZeroOrMore(symbol))) receive = Group(Keyword('{RECEIVE}') + Group(ZeroOrMore(symbol))) sendreceive = Group(Keyword('{SENDRECEIVE}') + Group(ZeroOrMore(symbol))) section = (enums | signals | send | receive | sendreceive) grammar = (version - title + Group(OneOrMore(section)) + StringEnd()) grammar.ignore(dblSlashComment) return grammar
def main(): word = Word(alphanums) command = Group(OneOrMore(word)) token = Suppress("->") device = Group(OneOrMore(word)) argument = Group(OneOrMore(word)) event = command + token + device + Optional(token + argument) gate = Gate() garage = Garage() airco = Aircondition() heating = Heating() boiler = Boiler() fridge = Fridge() open_actions = { 'gate': gate.open, 'garage': garage.open, 'air condition': airco.turn_on, 'heating': heating.turn_on, 'boiler temperature': boiler.increase_temperature, 'fridge temperature': fridge.increase_temperature } close_actions = { 'gate': gate.close, 'garage': garage.close, 'air condition': airco.turn_off, 'heating': heating.turn_off, 'boiler temperature': boiler.decrease_temperature, 'fridge temperature': fridge.decrease_temperature } tests = ('open -> gate', 'close -> garage', 'turn on -> air condition', 'turn off -> heating', 'increase -> boiler temperature -> 5 degrees', 'decrease -> fridge temperature -> 2 degrees') while True: print( ' 1. gate |==| 2. garage |==| 3. air condition |==| 4. heating |==| 5. boiler |==| 6.fridge |==| 7.exit' ) key = input('Choose object: ') if key == '1': option = input('Choose option: 1 - open, 2 - close: ') if option == '1': open_actions['gate']() elif option == '2': close_actions['gate']() elif key == '2': option = input('Choose option: 1 - open, 2 - close: ') if option == '1': open_actions['garage']() elif option == '2': close_actions['garage']() elif key == '3': option = input('Choose option: 1 - turn on, 2 - turn off: ') if option == '1': open_actions['air condition']() elif option == '2': close_actions['air condition']() elif key == '4': option = input('Choose option: 1 - turn on, 2 - turn off: ') if option == '1': open_actions['heating']() elif option == '2': close_actions['heating']() elif key == '5': option = input('Choose option: 1 - increase, 2 - decrease: ') if option == '1': t = int(input('Insert amount of temperature difference: ')) open_actions['boiler temperature'](t) elif option == '2': t = int(input('Insert amount of temperature difference: ')) close_actions['boiler temperature'](t) elif key == '6': option = input('Choose option: 1 - increase, 2 - decrease: ') if option == '1': t = int(input('Insert amount of temperature difference: ')) open_actions['fridge temperature'](t) elif option == '2': t = int(input('Insert amount of temperature difference: ')) close_actions['fridge temperature'](t) elif key == '7': exit() else: print(f'unknown option: {key}')
def create_grammar(container_ids, secret_ids): """ Create the grammar for the editfile """ from pyparsing import (nums, alphas, lineEnd, stringEnd, OneOrMore, ZeroOrMore, SkipTo, Optional, And, Word, CharsNotIn, Empty, QuotedString, Literal, Suppress, Group, Combine, originalTextFor, Forward, ParserElement) # Read from bottom to top whiteSpaceChars = ' \t' ParserElement.setDefaultWhitespaceChars(whiteSpaceChars) word = Empty() + CharsNotIn(whiteSpaceChars + '\n') quotedString = QuotedString(quoteChar='"', escChar='\\').setParseAction( # NOTE the second replace is a work-around for # pyparsing bug #68. # https://sourceforge.net/p/pyparsing/bugs/68/ lambda s, l, t: t[0].replace("\\n", "\n").replace("\\\\", "\\")) def secretIdNumberParseAction(s, loc, tokens): v = int(tokens[0]) if not v in secret_ids: raise ParseException(s, loc, "Not a valid secret id") return v secretIdNumber = Word(nums).setParseAction(secretIdNumberParseAction) def containerIdParseAction(s, loc, tokens): v = int(tokens[0]) if not v in container_ids: raise ParseException(s, loc, "Not a valid container id") return v containerId = Word(nums).setParseAction(containerIdParseAction) key = quotedString | word secretString = ~Literal('#') + (quotedString | word) secretId = Suppress('#') + secretIdNumber secret = secretString | secretId note = quotedString | originalTextFor(OneOrMore(word)) containerKeyword = Suppress('CONTAINER') entry = (~containerKeyword + Group(key - secret - Optional(note)) - Suppress(lineEnd)) comment = Suppress(lineEnd | '#' + SkipTo(lineEnd)) line = comment | entry containerLine = containerKeyword + containerId + comment # Instead of the following recursive grammar, we could have simply used # # containerBlock = ZeroOrMore(comment) + Group(containerLine # + Group(OneOrMore(line))) # multipleContainers = OneOrMore(containerBlock) # # but then any parsing error in line will result in a "expected stringEnd" # or "expected CONTAINER". _multipleContainers_head = Forward() _multipleContainers_body = Forward() _multipleContainers_head << (stringEnd | comment + _multipleContainers_head | containerLine + _multipleContainers_body) _multipleContainers_body << ( stringEnd | (containerLine | line) + _multipleContainers_body) _multipleContainers_entry = And([entry]) multipleContainers = And([_multipleContainers_head]) # TODO ibidem below containerLine.setParseAction(lambda s, l, t: [[None, t[0]]]) def multipleContainersParseAction(s, loc, tokens): curEntries = [] curId = None ret = [] for t in tuple(tokens) + ((None, None), ): if t[0] is not None: assert curId is not None curEntries.append(t) continue if curId is not None: ret.append([curId, curEntries]) curId = t[1] curEntries = [] return ret multipleContainers.setParseAction(multipleContainersParseAction) oneContainer = ZeroOrMore(line) + stringEnd oneContainer.setParseAction(lambda s, l, t: [[None, t]]) grammar = multipleContainers | oneContainer return grammar
def _create_grammar(): """Create the DBC grammar. """ word = Word(printables.replace(';', '').replace(':', '')) integer = Group(Optional('-') + Word(nums)) positive_integer = Word(nums).setName('positive integer') number = Word(nums + '.Ee-+') colon = Suppress(Literal(':')) scolon = Suppress(Literal(';')) pipe = Suppress(Literal('|')) at = Suppress(Literal('@')) sign = Literal('+') | Literal('-') lp = Suppress(Literal('(')) rp = Suppress(Literal(')')) lb = Suppress(Literal('[')) rb = Suppress(Literal(']')) comma = Suppress(Literal(',')) node = Word(alphas + nums + '_-').setWhitespaceChars(' ') frame_id = Word(nums).setName('frame id') version = Group(Keyword('VERSION') - QuotedString()) version.setName(VERSION) symbol = Word(alphas + '_') + Suppress(LineEnd()) symbols = Group(Keyword('NS_') - colon - Group(ZeroOrMore(symbol))) symbols.setName('NS_') discard = Suppress(Keyword('BS_') - colon).setName('BS_') nodes = Group(Keyword('BU_') - colon - Group(ZeroOrMore(node))) nodes.setName('BU_') signal = Group( Keyword(SIGNAL) - Group(word + Optional(word)) - colon - Group(positive_integer - pipe - positive_integer - at - positive_integer - sign) - Group(lp - number - comma - number - rp) - Group(lb - number - pipe - number - rb) - QuotedString() - Group(delimitedList(node))) signal.setName(SIGNAL) message = Group( Keyword(MESSAGE) - frame_id - word - colon - positive_integer - word - Group(ZeroOrMore(signal))) message.setName(MESSAGE) event = Suppress( Keyword(EVENT) - word - colon - positive_integer - lb - number - pipe - number - rb - QuotedString() - number - number - word - node - scolon) event.setName(EVENT) comment = Group( Keyword(COMMENT) - ((Keyword(SIGNAL) - frame_id - word - QuotedString() - scolon).setName(SIGNAL) | (Keyword(MESSAGE) - frame_id - QuotedString() - scolon).setName(MESSAGE) | (Keyword(EVENT) - word - QuotedString() - scolon).setName(EVENT) | (Keyword(NODES) - word - QuotedString() - scolon).setName(NODES) | (QuotedString() - scolon).setName('QuotedString'))) comment.setName(COMMENT) attribute_definition = Group( Keyword(ATTRIBUTE_DEFINITION) - ((QuotedString()) | (Keyword(SIGNAL) | Keyword(MESSAGE) | Keyword(EVENT) | Keyword(NODES)) + QuotedString()) - word - (scolon | (Group(ZeroOrMore(Group( (comma | Empty()) + QuotedString()))) + scolon) | (Group(ZeroOrMore(number)) + scolon))) attribute_definition.setName(ATTRIBUTE_DEFINITION) attribute_definition_default = Group( Keyword(ATTRIBUTE_DEFINITION_DEFAULT) - QuotedString() - (number | QuotedString()) - scolon) attribute_definition_default.setName(ATTRIBUTE_DEFINITION_DEFAULT) attribute = Group( Keyword(ATTRIBUTE) - QuotedString() - Group( Optional((Keyword(MESSAGE) + frame_id) | (Keyword(SIGNAL) + frame_id + word) | (Keyword(NODES) + word))) - (QuotedString() | number) - scolon) attribute.setName(ATTRIBUTE) choice = Group( Keyword(CHOICE) - Group(Optional(frame_id)) - word - Group(OneOrMore(Group(integer + QuotedString()))) - scolon) choice.setName(CHOICE) value_table = Group( Keyword(VALUE_TABLE) - word - Group(OneOrMore(Group(integer + QuotedString()))) - scolon) value_table.setName(VALUE_TABLE) signal_type = Group( Keyword(SIGNAL_TYPE) - frame_id - word - colon - positive_integer - scolon) signal_type.setName(SIGNAL_TYPE) signal_multiplexer_values = Group( Keyword(SIGNAL_MULTIPLEXER_VALUES) - frame_id - word - word - Group( delimitedList(positive_integer - Suppress('-') - Suppress(positive_integer))) - scolon) signal_multiplexer_values.setName(SIGNAL_MULTIPLEXER_VALUES) message_add_sender = Group( Keyword(MESSAGE_TX_NODE) - frame_id - colon - Group(delimitedList(node)) - scolon) message_add_sender.setName(MESSAGE_TX_NODE) attribute_definition_rel = Group( Keyword(ATTRIBUTE_DEFINITION_REL) - (QuotedString() | (Keyword(NODES_REL) + QuotedString())) - word - (scolon | (Group(ZeroOrMore(Group( (comma | Empty()) + QuotedString()))) + scolon) | (Group(ZeroOrMore(number)) + scolon))) attribute_definition_rel.setName(ATTRIBUTE_DEFINITION_REL) attribute_definition_default_rel = Group( Keyword(ATTRIBUTE_DEFINITION_DEFAULT_REL) - QuotedString() - (number | QuotedString()) - scolon) attribute_definition_default_rel.setName(ATTRIBUTE_DEFINITION_DEFAULT_REL) attribute_rel = Group( Keyword(ATTRIBUTE_REL) - QuotedString() - Keyword(NODES_REL) - word - Keyword(SIGNAL) - frame_id - word - (positive_integer | QuotedString()) - scolon) attribute_rel.setName(ATTRIBUTE_REL) signal_group = Group( Keyword(SIGNAL_GROUP) - frame_id - word - integer - colon - OneOrMore(word) - scolon) signal_group.setName(SIGNAL_GROUP) entry = (message | comment | attribute | choice | attribute_definition | attribute_definition_default | attribute_rel | attribute_definition_rel | attribute_definition_default_rel | signal_group | event | message_add_sender | value_table | signal_type | signal_multiplexer_values | discard | nodes | symbols | version) frame_id.setParseAction(lambda _s, _l, t: int(t[0])) return OneOrMore(entry) + StringEnd()
def input_from_blif(blif, block=None, merge_io_vectors=True): """ Read an open blif file or string as input, updating the block appropriately Assumes the blif has been flattened and their is only a single module. Assumes that there is only one single shared clock and reset Assumes that output is generated by Yosys with formals in a particular order Ignores reset signal (which it assumes is input only to the flip flops) """ import pyparsing import six from pyparsing import (Word, Literal, OneOrMore, ZeroOrMore, Suppress, Group, Keyword) block = working_block(block) try: blif_string = blif.read() except AttributeError: if isinstance(blif, six.string_types): blif_string = blif else: raise PyrtlError('input_blif expecting either open file or string') def SKeyword(x): return Suppress(Keyword(x)) def SLiteral(x): return Suppress(Literal(x)) def twire(x): """ find or make wire named x and return it """ s = block.get_wirevector_by_name(x) if s is None: s = WireVector(bitwidth=1, name=x) return s # Begin BLIF language definition signal_start = pyparsing.alphas + '$:[]_<>\\\/' signal_middle = pyparsing.alphas + pyparsing.nums + '$:[]_<>\\\/.' signal_id = Word(signal_start, signal_middle) header = SKeyword('.model') + signal_id('model_name') input_list = Group(SKeyword('.inputs') + OneOrMore(signal_id))('input_list') output_list = Group(SKeyword('.outputs') + OneOrMore(signal_id))('output_list') cover_atom = Word('01-') cover_list = Group(ZeroOrMore(cover_atom))('cover_list') namesignal_list = Group(OneOrMore(signal_id))('namesignal_list') name_def = Group(SKeyword('.names') + namesignal_list + cover_list)('name_def') # asynchronous Flip-flop dffas_formal = (SLiteral('C=') + signal_id('C') + SLiteral('R=') + signal_id('R') + SLiteral('D=') + signal_id('D') + SLiteral('Q=') + signal_id('Q')) dffas_keyword = SKeyword('$_DFF_PN0_') | SKeyword('$_DFF_PP0_') dffas_def = Group(SKeyword('.subckt') + dffas_keyword + dffas_formal)('dffas_def') # synchronous Flip-flop dffs_def = Group( SKeyword('.latch') + signal_id('D') + signal_id('Q') + SLiteral('re') + signal_id('C'))('dffs_def') command_def = name_def | dffas_def | dffs_def command_list = Group(OneOrMore(command_def))('command_list') footer = SKeyword('.end') model_def = Group(header + input_list + output_list + command_list + footer) model_list = OneOrMore(model_def) parser = model_list.ignore(pyparsing.pythonStyleComment) # Begin actually reading and parsing the BLIF file result = parser.parseString(blif_string, parseAll=True) # Blif file with multiple models (currently only handles one flattened models) assert (len(result) == 1) clk_set = set([]) ff_clk_set = set([]) def extract_inputs(model): start_names = [ re.sub(r'\[([0-9]+)\]$', '', x) for x in model['input_list'] ] name_counts = collections.Counter(start_names) for input_name in name_counts: bitwidth = name_counts[input_name] if input_name == 'clk': clk_set.add(input_name) elif not merge_io_vectors or bitwidth == 1: block.add_wirevector(Input(bitwidth=1, name=input_name)) else: wire_in = Input(bitwidth=bitwidth, name=input_name, block=block) for i in range(bitwidth): bit_name = input_name + '[' + str(i) + ']' bit_wire = WireVector(bitwidth=1, name=bit_name, block=block) bit_wire <<= wire_in[i] def extract_outputs(model): start_names = [ re.sub(r'\[([0-9]+)\]$', '', x) for x in model['output_list'] ] name_counts = collections.Counter(start_names) for output_name in name_counts: bitwidth = name_counts[output_name] if not merge_io_vectors or bitwidth == 1: block.add_wirevector(Output(bitwidth=1, name=output_name)) else: wire_out = Output(bitwidth=bitwidth, name=output_name, block=block) bit_list = [] for i in range(bitwidth): bit_name = output_name + '[' + str(i) + ']' bit_wire = WireVector(bitwidth=1, name=bit_name, block=block) bit_list.append(bit_wire) wire_out <<= concat(*bit_list) def extract_commands(model): # for each "command" (dff or net) in the model for command in model['command_list']: # if it is a net (specified as a cover) if command.getName() == 'name_def': extract_cover(command) # else if the command is a d flop flop elif command.getName() == 'dffas_def' or command.getName( ) == 'dffs_def': extract_flop(command) else: raise PyrtlError('unknown command type') def extract_cover(command): netio = command['namesignal_list'] if len(command['cover_list']) == 0: output_wire = twire(netio[0]) output_wire <<= Const(0, bitwidth=1, block=block) # const "FALSE" elif command['cover_list'].asList() == ['1']: output_wire = twire(netio[0]) output_wire <<= Const(1, bitwidth=1, block=block) # const "TRUE" elif command['cover_list'].asList() == ['1', '1']: # Populate clock list if one input is already a clock if (netio[1] in clk_set): clk_set.add(netio[0]) elif (netio[0] in clk_set): clk_set.add(netio[1]) else: output_wire = twire(netio[1]) output_wire <<= twire(netio[0]) # simple wire elif command['cover_list'].asList() == ['0', '1']: output_wire = twire(netio[1]) output_wire <<= ~twire(netio[0]) # not gate elif command['cover_list'].asList() == ['11', '1']: output_wire = twire(netio[2]) output_wire <<= twire(netio[0]) & twire(netio[1]) # and gate elif command['cover_list'].asList() == ['00', '1']: output_wire = twire(netio[2]) output_wire <<= ~(twire(netio[0]) | twire(netio[1])) # nor gate elif command['cover_list'].asList() == ['1-', '1', '-1', '1']: output_wire = twire(netio[2]) output_wire <<= twire(netio[0]) | twire(netio[1]) # or gate elif command['cover_list'].asList() == ['10', '1', '01', '1']: output_wire = twire(netio[2]) output_wire <<= twire(netio[0]) ^ twire(netio[1]) # xor gate elif command['cover_list'].asList() == ['1-0', '1', '-11', '1']: output_wire = twire(netio[3]) output_wire <<= (twire(netio[0]) & ~ twire(netio[2])) \ | (twire(netio[1]) & twire(netio[2])) # mux elif command['cover_list'].asList() == ['-00', '1', '0-0', '1']: output_wire = twire(netio[3]) output_wire <<= (~twire(netio[1]) & ~twire(netio[2])) \ | (~twire(netio[0]) & ~twire(netio[2])) else: raise PyrtlError('Blif file with unknown logic cover set "%s"' '(currently gates are hard coded)' % command['cover_list']) def extract_flop(command): if (command['C'] not in ff_clk_set): ff_clk_set.add(command['C']) # Create register and assign next state to D and output to Q regname = command['Q'] + '_reg' flop = Register(bitwidth=1, name=regname) flop.next <<= twire(command['D']) flop_output = twire(command['Q']) flop_output <<= flop for model in result: extract_inputs(model) extract_outputs(model) extract_commands(model)
Empty().setParseAction(lambda s, l, t: l)('start') + funcname + leftParen + Optional( args + Optional( comma + kwargs ) ) + rightParen + Empty().leaveWhitespace().setParseAction(lambda s, l, t: l)('end') ).setParseAction(setRaw)('call') # Metric pattern (aka. pathExpression) validMetricChars = ''.join((set(printables) - set(symbols))) escapedChar = backslash + Word(symbols + '=', exact=1) partialPathElem = Combine( OneOrMore( escapedChar | Word(validMetricChars) ) ) matchEnum = Combine( leftBrace + delimitedList(partialPathElem, combine=True) + rightBrace ) pathElement = Combine( Group(partialPathElem | matchEnum) + ZeroOrMore(matchEnum | partialPathElem) ) pathExpression = delimitedList(pathElement, delim='.', combine=True)('pathExpression')
#!/usr/bin/python3 #cf. http://pyparsing.wikispaces.com/share/view/81099063 from pyparsing import Forward, Literal, OneOrMore, ParseException, Suppress, Token, Word, basestring, printables Bold = Suppress(Literal('**')) Italic = Suppress(Literal('__')) Text = OneOrMore(Word(printables)) def test(msg): parsed = TextGrammar.parseString(msg, parseAll=True) #print(parsed.dump()) print('msg: {} => tokens={} is_bold={} is_italic={}'.format( msg, parsed.text, bool(parsed.is_bold), bool(parsed.is_italic))) #------------------------------------------------------------------------------ # 1st implementation class StopOnSuffix( Token ): # cannot be a TokenConverter because .postParse does not alter loc def __init__(self, token_matcher, suffixes): super(StopOnSuffix, self).__init__() self.name = 'StopOnSuffix' self.mayReturnEmpty = token_matcher.mayReturnEmpty self.mayIndexError = token_matcher.mayIndexError self.saveAsList = token_matcher.saveAsList
VALUE = Forward() SVALUE = FLOAT | FRACTION | INTEGER | STRING | BOOLEAN | FLAG VALUE << (SVALUE | (LBRACE + Group(delimitedList(VALUE) | Empty()).setParseAction( lambda s, l, t: t.asList()) + RBRACE)) VARIABLE = (IDENTIFIER("name") + Optional(LBRACK + SIZE("size") + RBRACK)) SCALARTYPE = Or(map(Keyword, "int real logical flag char string".split())) STRUCTMEMBERS = Forward() STRUCTTYPE = Keyword("struct") + LBRACE + STRUCTMEMBERS("members") + RBRACE DECLARATION = ((SCALARTYPE | STRUCTTYPE)("type") + Optional(LBRACK + SIZE + RBRACK) + VARIABLE) STRUCTMEMBERS << Group(ZeroOrMore(Group(DECLARATION + SEMI))) DECL_ASS_STMT = DECLARATION + Optional(EQUAL + VALUE("value")) + SEMI SECTION = (Keyword('section') + IDENTIFIER('name') + LBRACE + Group(OneOrMore(Group(DECL_ASS_STMT)))('declarations') + RBRACE + SEMI) COMMENT = '#' + restOfLine # todo: could potentially match '#' within strings? CONFIG = Group(OneOrMore(Group(SECTION))) CONFIG.ignore(COMMENT) def walk(ns, declaration, value): # walks through declaration tokens recursively and constructs namespace if declaration.type[0] == 'struct': if declaration.size: subdecs_vals = [] for i, v in enumerate(value): fake_declaration = declaration.copy() fake_declaration.name = i fake_declaration.size = 0 # prevents infinite recursion
class SqlGrammarMSSQLServer(SqlGrammar): """ SQL grammar (subclass of :class:`.SqlGrammar`) implementing Microsoft SQL Server syntax. """ # ------------------------------------------------------------------------- # Forward declarations # ------------------------------------------------------------------------- expr = Forward() select_statement = Forward() # ------------------------------------------------------------------------- # Keywords # ------------------------------------------------------------------------- # https://msdn.microsoft.com/en-us/library/ms189822.aspx sql_server_reserved_words = """ ADD ALL ALTER AND ANY AS ASC AUTHORIZATION BACKUP BEGIN BETWEEN BREAK BROWSE BULK BY CASCADE CASE CHECK CHECKPOINT CLOSE CLUSTERED COALESCE COLLATE COLUMN COMMIT COMPUTE CONSTRAINT CONTAINS CONTAINSTABLE CONTINUE CONVERT CREATE CROSS CURRENT CURRENT_DATE CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER CURSOR DATABASE DBCC DEALLOCATE DECLARE DEFAULT DELETE DENY DESC DISK DISTINCT DISTRIBUTED DOUBLE DROP DUMP ELSE END ERRLVL ESCAPE EXCEPT EXEC EXECUTE EXISTS EXIT EXTERNAL FETCH FILE FILLFACTOR FOR FOREIGN FREETEXT FREETEXTTABLE FROM FULL FUNCTION GOTO GRANT GROUP HAVING HOLDLOCK IDENTITY IDENTITY_INSERT IDENTITYCOL IF IN INDEX INNER INSERT INTERSECT INTO IS JOIN KEY KILL LEFT LIKE LINENO LOAD MERGE NATIONAL NOCHECK NONCLUSTERED NOT NULL NULLIF OF OFF OFFSETS ON OPEN OPENDATASOURCE OPENQUERY OPENROWSET OPENXML OPTION OR ORDER OUTER OVER PERCENT PIVOT PLAN PRECISION PRIMARY PRINT PROC PROCEDURE PUBLIC RAISERROR READ READTEXT RECONFIGURE REFERENCES REPLICATION RESTORE RESTRICT RETURN REVERT REVOKE RIGHT ROLLBACK ROWCOUNT ROWGUIDCOL RULE SAVE SCHEMA SECURITYAUDIT SELECT SEMANTICKEYPHRASETABLE SEMANTICSIMILARITYDETAILSTABLE SEMANTICSIMILARITYTABLE SESSION_USER SET SETUSER SHUTDOWN SOME STATISTICS SYSTEM_USER TABLE TABLESAMPLE TEXTSIZE THEN TO TOP TRAN TRANSACTION TRIGGER TRUNCATE TRY_CONVERT TSEQUAL UNION UNIQUE UNPIVOT UPDATE UPDATETEXT USE USER VALUES VARYING VIEW WAITFOR WHEN WHERE WHILE WITH WITHIN WRITETEXT """ # ... "WITHIN GROUP" is listed, not "WITHIN", but odbc_reserved_words = """ ABSOLUTE ACTION ADA ADD ALL ALLOCATE ALTER AND ANY ARE AS ASC ASSERTION AT AUTHORIZATION AVG BEGIN BETWEEN BIT BIT_LENGTH BOTH BY CASCADE CASCADED CASE CAST CATALOG CHAR CHAR_LENGTH CHARACTER CHARACTER_LENGTH CHECK CLOSE COALESCE COLLATE COLLATION COLUMN COMMIT CONNECT CONNECTION CONSTRAINT CONSTRAINTS CONTINUE CONVERT CORRESPONDING COUNT CREATE CROSS CURRENT CURRENT_DATE CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER CURSOR DATE DAY DEALLOCATE DEC DECIMAL DECLARE DEFAULT DEFERRABLE DEFERRED DELETE DESC DESCRIBE DESCRIPTOR DIAGNOSTICS DISCONNECT DISTINCT DOMAIN DOUBLE DROP ELSE END END-EXEC ESCAPE EXCEPT EXCEPTION EXEC EXECUTE EXISTS EXTERNAL EXTRACT FALSE FETCH FIRST FLOAT FOR FOREIGN FORTRAN FOUND FROM FULL GET GLOBAL GO GOTO GRANT GROUP HAVING HOUR IDENTITY IMMEDIATE IN INCLUDE INDEX INDICATOR INITIALLY INNER INPUT INSENSITIVE INSERT INT INTEGER INTERSECT INTERVAL INTO IS ISOLATION JOIN KEY LANGUAGE LAST LEADING LEFT LEVEL LIKE LOCAL LOWER MATCH MAX MIN MINUTE MODULE MONTH NAMES NATIONAL NATURAL NCHAR NEXT NO NONE NOT NULL NULLIF NUMERIC OCTET_LENGTH OF ON ONLY OPEN OPTION OR ORDER OUTER OUTPUT OVERLAPS PAD PARTIAL PASCAL POSITION PRECISION PREPARE PRESERVE PRIMARY PRIOR PRIVILEGES PROCEDURE PUBLIC READ REAL REFERENCES RELATIVE RESTRICT REVOKE RIGHT ROLLBACK ROWS SCHEMA SCROLL SECOND SECTION SELECT SESSION SESSION_USER SET SIZE SMALLINT SOME SPACE SQL SQLCA SQLCODE SQLERROR SQLSTATE SQLWARNING SUBSTRING SUM SYSTEM_USER TABLE TEMPORARY THEN TIME TIMESTAMP TIMEZONE_HOUR TIMEZONE_MINUTE TO TRAILING TRANSACTION TRANSLATE TRANSLATION TRIM TRUE UNION UNIQUE UNKNOWN UPDATE UPPER USAGE USER USING VALUE VALUES VARCHAR VARYING VIEW WHEN WHENEVER WHERE WITH WORK WRITE YEAR ZONE """ # ... who thought "END-EXEC" was a good one? # Then some more: # - WITH ROLLUP: https://technet.microsoft.com/en-us/library/ms189305(v=sql.90).aspx # noqa # - SOUNDEX: https://msdn.microsoft.com/en-us/library/ms187384.aspx rnc_extra_sql_server_keywords = """ ROLLUP SOUNDEX """ sql_server_keywords = sorted( list( set(sql_server_reserved_words.split() + odbc_reserved_words.split() + ANSI92_RESERVED_WORD_LIST.split()))) # log.critical(sql_server_keywords) keyword = make_words_regex(sql_server_keywords, caseless=True, name="keyword") # ------------------------------------------------------------------------- # Comments # ------------------------------------------------------------------------- # https://msdn.microsoft.com/en-us/library/ff848807.aspx comment = ansi_comment # ----------------------------------------------------------------------------- # identifier # ----------------------------------------------------------------------------- # http://dev.mysql.com/doc/refman/5.7/en/identifiers.html bare_identifier_word = make_regex_except_words(r"\b[a-zA-Z0-9$_]*\b", ANSI92_RESERVED_WORD_LIST, caseless=True, name="bare_identifier_word") identifier = (bare_identifier_word | QuotedString( quoteChar="[", endQuoteChar="]", unquoteResults=False)).setName("identifier") collation_name = identifier.copy() column_name = identifier.copy() column_alias = identifier.copy() table_name = identifier.copy() table_alias = identifier.copy() schema_name = identifier.copy() index_name = identifier.copy() function_name = identifier.copy() parameter_name = identifier.copy() database_name = identifier.copy() no_dot = NotAny('.') table_spec = ( Combine(database_name + '.' + schema_name + '.' + table_name + no_dot) | Combine(schema_name + '.' + table_name + no_dot) | table_name + no_dot).setName("table_spec") column_spec = ( Combine(database_name + '.' + schema_name + '.' + table_name + '.' + column_name + no_dot) | Combine(schema_name + '.' + table_name + '.' + column_name + no_dot) | Combine(table_name + '.' + column_name + no_dot) | column_name + no_dot).setName("column_spec") # I'm unsure if SQL Server allows keywords in the parts after dots, like # MySQL does. # - http://stackoverflow.com/questions/285775/how-to-deal-with-sql-column-names-that-look-like-sql-keywords # noqa bind_parameter = Literal('?') variable = Regex(r"@[a-zA-Z0-9\.$_]+").setName("variable") argument_list = (delimitedList(expr).setName("arglist").setParseAction( ', '.join)) function_call = Combine(function_name + LPAR) + argument_list + RPAR # Not supported: index hints # ... http://stackoverflow.com/questions/11016935/how-can-i-force-a-query-to-not-use-a-index-on-a-given-table # noqa # ----------------------------------------------------------------------------- # CASE # ----------------------------------------------------------------------------- case_expr = ((CASE + expr + OneOrMore(WHEN + expr + THEN + expr) + Optional(ELSE + expr) + END) | (CASE + OneOrMore(WHEN + expr + THEN + expr) + Optional(ELSE + expr) + END)).setName("case_expr") # ----------------------------------------------------------------------------- # Expressions # ----------------------------------------------------------------------------- aggregate_function = ( # https://msdn.microsoft.com/en-us/library/ms173454.aspx AVG | CHECKSUM_AGG | COUNT | COUNT_BIG | GROUPING | GROUPING_ID | MAX | MIN | STDEV | STDEV_P | SUM | VAR | VARP) expr_term = ( INTERVAL + expr + time_unit | Optional(EXISTS) + LPAR + select_statement + RPAR | # ... e.g. mycol = EXISTS(SELECT ...) # ... e.g. mycol IN (SELECT ...) LPAR + delim_list(expr) + RPAR | # ... e.g. mycol IN (1, 2, 3) case_expr | bind_parameter | variable | function_call | literal_value | column_spec # not just identifier ) UNARY_OP, BINARY_OP, TERNARY_OP = 1, 2, 3 expr << infixNotation( expr_term, [ # Having lots of operations in the list here SLOWS IT DOWN A LOT. # Just combine them into an ordered list. (COLLATE | oneOf('! - + ~'), UNARY_OP, opAssoc.RIGHT), ( ( oneOf('^ * / %') | oneOf('+ - << >> & | = <=> >= > <= < <> !=') | (IS + Optional(NOT)) | LIKE | (Optional(NOT) + IN) | SOUNDEX # RNC; presumably at same level as LIKE ), BINARY_OP, opAssoc.LEFT), ((BETWEEN, AND), TERNARY_OP, opAssoc.LEFT), # CASE handled above (hoping precedence is not too much of a problem) (NOT, UNARY_OP, opAssoc.RIGHT), (AND | '&&' | OR | '||' | ':=', BINARY_OP, opAssoc.LEFT), ], lpar=LPAR, rpar=RPAR) # ignores LIKE [ESCAPE] # ------------------------------------------------------------------------- # SELECT # ------------------------------------------------------------------------- compound_operator = UNION + Optional(ALL | DISTINCT) ordering_term = (expr + Optional(COLLATE + collation_name) + Optional(ASC | DESC)) join_constraint = Optional( Group((ON + expr) | (USING + LPAR + delim_list(column_name) + RPAR))) join_op = Group(COMMA | NATURAL + (Optional(LEFT | RIGHT) + Optional(OUTER)) + JOIN | (INNER | CROSS) + JOIN | Optional(LEFT | RIGHT) + Optional(OUTER) + JOIN) join_source = Forward() single_source = (( table_spec.copy().setResultsName("from_tables", listAllMatches=True) + Optional(Optional(AS) + table_alias) # Optional(index_hint_list) # not supported yet ) | (select_statement + Optional(AS) + table_alias) + (LPAR + join_source + RPAR)) join_source << Group(single_source + ZeroOrMore(join_op + single_source + join_constraint))("join_source") # ... must have a Group to append to it later, it seems # ... but name it "join_source" here, or it gets enclosed in a further list # when you name it later result_base = ( # Aggregate functions: e.g. "MAX(" allowed, "MAX (" not allowed Combine(COUNT + LPAR) + '*' + RPAR | # special aggregate function Combine(COUNT + LPAR) + DISTINCT + expr + RPAR | # special aggregate function # noqa Combine(aggregate_function + LPAR) + expr + RPAR | expr | '*' | Combine(table_name + '.' + '*') | column_spec | literal_value) result_column = (result_base + Optional(Optional(AS) + column_alias)).setResultsName( "select_columns", listAllMatches=True) # ------------------------------------------------------------------------- # SELECT # ------------------------------------------------------------------------- where_expr = Group(expr).setResultsName("where_expr") where_clause = Group(Optional(WHERE + where_expr)).setResultsName("where_clause") select_core = ( SELECT + Optional(TOP + integer) + Group(Optional(ALL | DISTINCT))("select_specifier") + Group(delim_list(result_column))("select_expression") + Optional(FROM + join_source + where_clause + Optional(GROUP + BY + delim_list(ordering_term + Optional(ASC | DESC)) ("group_by_term") + Optional(WITH + ROLLUP)) + Optional(HAVING + expr("having_expr")))) select_statement << ( select_core + ZeroOrMore(compound_operator + select_core) + Optional(ORDER + BY + delim_list(ordering_term + Optional(ASC | DESC)) ("order_by_terms")) + # PROCEDURE ignored # rest ignored Optional(';')) select_statement.ignore(comment) # https://msdn.microsoft.com/en-us/library/ms175874.aspx # ... approximately (and conservatively): MSSQL_INVALID_FIRST_IF_UNQUOTED = re.compile(r"[^a-zA-Z_@#]") MSSQL_INVALID_IF_UNQUOTED = re.compile(r"[^a-zA-Z0-9_@#$]") def __init__(self): super().__init__() @classmethod def quote_identifier(cls, identifier: str) -> str: return f"[{identifier}]" @classmethod def is_quoted(cls, identifier: str) -> bool: return identifier.startswith("[") and identifier.endswith("]") @classmethod def requires_quoting(cls, identifier: str) -> bool: assert identifier, "Empty identifier" if cls.MSSQL_INVALID_IF_UNQUOTED.search(identifier): return True firstchar = identifier[0] if cls.MSSQL_INVALID_FIRST_IF_UNQUOTED.search(firstchar): return True if identifier.upper() in cls.sql_server_keywords: return True return False @classmethod def get_grammar(cls) -> ParserElement: # Grammar (here, just SELECT) return cls.select_statement @classmethod def get_column_spec(cls): return cls.column_spec @classmethod def get_result_column(cls): return cls.result_column @classmethod def get_join_op(cls): return cls.join_op @classmethod def get_table_spec(cls): return cls.table_spec @classmethod def get_join_constraint(cls): return cls.join_constraint @classmethod def get_select_statement(cls): return cls.select_statement @classmethod def get_expr(cls): return cls.expr @classmethod def get_where_clause(cls): return cls.where_clause @classmethod def get_where_expr(cls): return cls.where_expr @classmethod def test_dialect_specific_2(cls): log.info("Testing Microsoft SQL Server-specific aspects...") log.info("Testing quoted identifiers") test_succeed(cls.identifier, "[FROM]") test_succeed(cls.identifier, "[SELECT FROM]") log.info("Testing table_spec") # SQL Server uses up to: db.schema.table.column test_succeed(cls.table_spec, "mytable") test_succeed(cls.table_spec, "mydb.mytable") test_succeed(cls.table_spec, "mydb.[my silly table]") test_succeed(cls.table_spec, "mydb.myschema.mytable") test_fail(cls.table_spec, "mydb . mytable") test_fail(cls.table_spec, "mydb.myschema.mytable.mycol") log.info("Testing column_spec") test_succeed(cls.column_spec, "mycol") test_succeed(cls.column_spec, "forename") test_succeed(cls.column_spec, "mytable.mycol") test_succeed(cls.column_spec, "t1.a") test_succeed(cls.column_spec, "[my silly table].[my silly column]") test_succeed(cls.column_spec, "mydb.myschema.mytable.mycol") test_succeed(cls.column_spec, "myschema.mytable.mycol") test_fail(cls.column_spec, "myschema . mytable . mycol") log.info("Testing variable") test_succeed(cls.variable, "@myvar") log.info("Testing argument_list") test_succeed(cls.argument_list, "@myvar, 5") log.info("Testing function_call") test_succeed(cls.function_call, "myfunc(@myvar, 5)") # --------------------------------------------------------------------- # Expressions # --------------------------------------------------------------------- log.info("Testing case_expr") test_succeed( cls.case_expr, """ CASE v WHEN 2 THEN x WHEN 3 THEN y ELSE -99 END """)
[gen_let([[n, parse(v, env)] for (n,v) in sexp[1]])] + [parse(p, names) for p in sexp[2:]]) elif leader == '$' : # force dollar. return gen_dollar(sexp[1]) elif leader == '!$' : # force undollar. return sexp[1] elif leader == '.': dotleader = sexp[1] return gen_dot([parse(dotleader, env)] +\ [parse(p, env) if type(p) is list else p for p in sexp[2:]]) # we must prevent elif leader == 'if': return gen_if([parse(p, env) for p in sexp[1:]]) if len(sexp) == 4 else\ gen_if([parse(p, env) for p in sexp[1:]]+['']) elif leader in unary_ops: return gen_unary(leader, parse(sexp[1], env)) elif leader in binary_ops: return gen_binary(leader, parse(sexp[1], env), parse(sexp[2], env)) elif leader in arbitrary_ops: return gen_arbitrary(leader, [parse(p, env) for p in sexp[1:]]) else: return gen_funcall(leader, [parse(p, env) for p in sexp[1:]]) with open(sys.argv[1], 'r') as f: sexp = f.read() try: data = OneOrMore(nestedExpr()).parseString(sexp) except: print('Syntax error.') ; exit(1) print(parse(list(data)[0].asList()))
seconds_type('acache_cleaning_interval') + semicolon) options_stmt_acache_enable = (Keyword('acache-enable').suppress() - isc_boolean('acache-enable') + semicolon) options_stmt_answer_cookie = (Keyword('answer-cookie').suppress() - isc_boolean('answer-cookie') + semicolon) options_stmt_automatic_interface_scan = ( Keyword('automatic-interface-scan').suppress() - isc_boolean('automatic_interface_scan') + semicolon) options_ip_port_list = (ip_port + semicolon) options_ip_port_series = ( OneOrMore(options_ip_port_list)('options_ip_port_series_OneOrMore')) options_stmt_avoid_v4_udp_ports = ( Keyword('avoid-v4-udp-ports').suppress() + lbrack + options_ip_port_series('avoid_v4_udp_ports') + rbrack + semicolon) options_stmt_avoid_v4_udp_ports.setName('avoid-v4-udp-ports { port; ... };') options_stmt_avoid_v6_udp_ports = ( Keyword('avoid-v6-udp-ports').suppress() + lbrack + options_ip_port_series('avoid_v6_udp_ports') + rbrack + semicolon) options_stmt_avoid_v6_udp_ports.setName('avoid-v6-udp-ports { port; ... };') # Wut? was 'bindkey-file', now 'bindkeys-file'??? # bindkey-file <path_name>; [ Opt ] # v9.5.0 to Feb 2017 options_stmt_bindkeys_file = (Keyword('bindkeys-file').suppress() - quoted_path_name('bindkeys_file') + semicolon)
| (decimal_sep + Word(nums)) + Optional(oneOf("E e") + Word(nums))) + Optional(quote).suppress()).setName("real") # TODO: Positive real number between zero and one. decimal = real # String ---------------------------------------------------------------------- q_string = (sglQuotedString | dblQuotedString).setName("q_string") #double_quoted_string = QuotedString('"', multiline=True,escChar="\\", # unquoteResults=True) # dblQuotedString double_quoted_string = Regex(r'\"(?:\\\"|\\\\|[^"])*\"', re.MULTILINE) double_quoted_string.setParseAction(removeQuotes) quoted_string = Combine(double_quoted_string + Optional(OneOrMore(pluss + double_quoted_string)), adjacent=False) word = quoted_string.setName("word") # Word(alphanums) # Graph attributes ------------------------------------------------------------ hex_color = Word(hexnums, exact=2) #TODO: Optional whitespace rgb = Literal("#").suppress() + hex_color.setResultsName("red") + \ hex_color.setResultsName("green") + hex_color.setResultsName("blue") rgba = rgb + hex_color.setResultsName("alpha") hsv = decimal.setResultsName("hue") + decimal.setResultsName("saturation") + \ decimal.setResultsName("value") color_name = double_quoted_string | Word(alphas) colour = rgba | rgb | hsv | color_name #------------------------------------------------------------------------------