operand = number | int_variables_ref | misc_variables_ref operand.setName('r-value') rvalue << myOperatorPrecedence(operand, [ ('-', 1, opAssoc.RIGHT, Unary.parse_action), ('*', 2, opAssoc.LEFT, Binary.parse_action), ('-', 2, opAssoc.LEFT, Binary.parse_action), ('+', 2, opAssoc.LEFT, Binary.parse_action), ]) # I want # - BindVariable to have precedence to EqualTo(VariableRef) # but I also want: # - Arithmetic to have precedence w.r.t BindVariable # last is variables add_contract(misc_variables_contract) add_contract(int_variables_contract) add_contract(rvalue.copy().setParseAction(EqualTo.parse_action)) hardwired = MatchFirst(ParsingTmp.contract_types) hardwired.setName('Predefined contract expression') simple_contract << (hardwired | identifier_contract) simple_contract.setName('simple contract expression') any_contract = composite_contract | simple_contract any_contract.setName('Any simple or composite contract') contract_expression << (any_contract) # Parentheses before << !!
op = operatorPrecedence # op = myOperatorPrecedence rvalue << op(operand, [ ('-', 1, opAssoc.RIGHT, Unary.parse_action), ('*', 2, opAssoc.LEFT, Binary.parse_action), ('-', 2, opAssoc.LEFT, Binary.parse_action), ('+', 2, opAssoc.LEFT, Binary.parse_action), ('^', 2, opAssoc.LEFT, Binary.parse_action), ]) # I want # - BindVariable to have precedence to EqualTo(VariableRef) # but I also want: # - Arithmetic to have precedence w.r.t BindVariable # last is variables add_contract(misc_variables_contract) add_contract(int_variables_contract) add_contract(rvalue.copy().setParseAction(EqualTo.parse_action)) hardwired = MatchFirst(ParsingTmp.contract_types) hardwired.setName('Predefined contract expression') simple_contract << (hardwired | identifier_contract) simple_contract.setName('simple contract expression') any_contract = composite_contract | simple_contract any_contract.setName('Any simple or composite contract') contract_expression << (any_contract) # Parentheses before << !!
class WebParser(object): boolMaps = {"false": False, "true": True, "yes": True, "no": False} PSTYPE_DEFAULT = "DEFAULT" PSTYPE_JS = "JS" PSTYPE_DICT = "dict" PSTYPE_RATINGS = "ratings" def __init__(self, fromFile=False): self.fromFile = fromFile self.parseTypes = dict() self.__defineBasicTypes() self.__defineDictGrammar() self.__defineJSGrammar() quoteit = lambda self, v, lq='"', rq=None: \ Suppress(lq) + Optional(v) + Suppress(rq) \ if rq is not None else \ Suppress(lq) + v + Suppress(lq) quoteitno = lambda self, v, lq='"', rq=None: \ Suppress(lq) + v + Suppress(rq) \ if rq is not None else \ Suppress(lq) + v + Suppress(lq) datatypeAndQuote = lambda self, v, lq='"', rq=None: \ MatchFirst([v, self.quoteitno(v, lq, rq)]) completeType = lambda self, bt, name="", fn=None, lq='"', rq=None: \ (self.datatypeAndQuote(bt, lq, rq)).setName(name).setParseAction(fn) \ if fn is not None else \ (self.datatypeAndQuote(bt, lq, rq)).setName(name) @logtrace def __defineBasicTypes(self): self.KDELIM = Suppress(":") sign = Word("+-", max=1) + FollowedBy(Word(nums)) crncy = Word(nums) + ZeroOrMore(Suppress(",") + Word(nums)) + \ Optional(Literal(".") + Word(nums)) baseUnknownValue = Keyword("?") self.unknown = self.completeType(baseUnknownValue, "UNKNOWN_VAL", lambda t: np.nan) floatNumberBasic = Combine(Optional(sign) + \ Or([Word(nums), crncy, Regex(r'[0-9]+(\.\d*)?([eE]\d+)?')])) + \ Optional(Suppress("%")) self.floatNumber = self.completeType(floatNumberBasic, "float", lambda t: float(t[0])) baseBoolValue = Or([ CaselessKeyword("false"), CaselessKeyword("true"), CaselessKeyword("yes"), CaselessKeyword("no") ]) self.boolean = self.completeType(baseBoolValue, "bool", lambda t: WebParser.boolMaps[t[0]]) ratingKeywords = [CaselessKeyword(k).setParseAction( \ lambda t: Ratings.ratingMaps[t[0].lower()]) \ for k in Ratings.ratingMaps.keys()] ratingKeywords.append(Keyword("--").setParseAction(lambda t: np.nan)) self.ratings = self.completeType(Or(ratingKeywords), "ratings") self.parseTypes[WebParser.PSTYPE_RATINGS] = self.ratings @logtrace def __defineDictGrammar(self): """Function defines the grammar for parsing a string(mainly) into: 1. Value: Value could be any one of the following 1. Simple types such as: a. numbers: all are floating point b. boolean: [true,false], [yes, no] c. Strings within double quotes d. alphanumerics 2. Dictionary 3. List 2. Dictionary: Set of key value pairs. ':' delimits values from keys. ',' delimites different pairs. '{}' delimits a dictionary. 3. List: Ordered list of values delimited by ',' pyparsing parse actions are used to convert the tokens into pyton native datatype such 'float' for floating point, 'dict' for dictionary and 'list' for list. The parser supports arbitrary nesting of the above tokens. Both the nesting and datastructure type integrity is preserved in the resulting python representation. Application: One of the main use of the grammar is to scrap web pages and extract a combination of JSON and javascript-like HTML attributes into python data structures. Simpler use cases include extracting supported simple data types from say, HTML tables. """ dictDefn = Forward() listDefn = Forward() key = (QuotedString('"') | Word(alphas)) + FollowedBy(Literal(":")) key.setName("key") self.value = MatchFirst([ self.unknown, self.floatNumber, self.boolean, QuotedString('"'), Word(alphanums), dictDefn, listDefn ]) self.value.setName("value") # dict_element = Group(key + self.KDELIM + self.value) dict_element = Group(key + self.KDELIM + self.value) + \ FollowedBy(Or([Literal(","), Literal("}")])) lde = Group(Dict(delimitedList(dict_element))) dictDefn << ((self.quoteit(lde, '{', '}')) | lde) self.dictDefn = dictDefn self.dictDefn.setName("Dictionary") listDefn << self.quoteit(Group(delimitedList(self.value)), '[', ']') self.listDefn = listDefn self.listDefn.setName("List") self.topElement = Or([self.dictDefn, self.listDefn, self.value]) self.parseTypes[WebParser.PSTYPE_DEFAULT] = self.topElement self.parseTypes[WebParser.PSTYPE_DICT] = self.dictDefn return @logtrace def __defineJSGrammar(self): identifier = Word(alphas + "_", alphanums + "_") jsFn = identifier + Suppress(".") + identifier jsArgs = Suppress("(") + self.topElement + Suppress(")") jsStmt = jsFn + jsArgs + Suppress(";") self.jsStmt = jsStmt.setName("JS_Statement") self.parseTypes[WebParser.PSTYPE_JS] = self.jsStmt @logtrace def __parse(self, inputStr, parseType): if self.fromFile: parsed = self.parseTypes[parseType].parseFile(inputStr) else: parsed = self.parseTypes[parseType].parseString(inputStr) if parseType == WebParser.PSTYPE_DEFAULT or \ parseType == WebParser.PSTYPE_RATINGS: return parsed[0] return parsed @logtrace def parse(self, inputStr, parseType=None): if parseType is None: parseType = WebParser.PSTYPE_DEFAULT return self.__parse(inputStr, parseType)