Пример #1
0
operand = number | int_variables_ref | misc_variables_ref
operand.setName('r-value')


rvalue << myOperatorPrecedence(operand, [
             ('-', 1, opAssoc.RIGHT, Unary.parse_action),
             ('*', 2, opAssoc.LEFT, Binary.parse_action),
             ('-', 2, opAssoc.LEFT, Binary.parse_action),
             ('+', 2, opAssoc.LEFT, Binary.parse_action),
          ])

# I want 
# - BindVariable to have precedence to EqualTo(VariableRef)
# but I also want:
# - Arithmetic to have precedence w.r.t BindVariable 
# last is variables
add_contract(misc_variables_contract)
add_contract(int_variables_contract)
add_contract(rvalue.copy().setParseAction(EqualTo.parse_action))

hardwired = MatchFirst(ParsingTmp.contract_types)
hardwired.setName('Predefined contract expression')

simple_contract << (hardwired | identifier_contract)
simple_contract.setName('simple contract expression')

any_contract = composite_contract | simple_contract
any_contract.setName('Any simple or composite contract')
contract_expression << (any_contract) # Parentheses before << !!

Пример #2
0
op = operatorPrecedence
# op  = myOperatorPrecedence
rvalue << op(operand, [
    ('-', 1, opAssoc.RIGHT, Unary.parse_action),
    ('*', 2, opAssoc.LEFT, Binary.parse_action),
    ('-', 2, opAssoc.LEFT, Binary.parse_action),
    ('+', 2, opAssoc.LEFT, Binary.parse_action),
    ('^', 2, opAssoc.LEFT, Binary.parse_action),
])

# I want
# - BindVariable to have precedence to EqualTo(VariableRef)
# but I also want:
# - Arithmetic to have precedence w.r.t BindVariable
# last is variables
add_contract(misc_variables_contract)
add_contract(int_variables_contract)

add_contract(rvalue.copy().setParseAction(EqualTo.parse_action))

hardwired = MatchFirst(ParsingTmp.contract_types)
hardwired.setName('Predefined contract expression')

simple_contract << (hardwired | identifier_contract)
simple_contract.setName('simple contract expression')

any_contract = composite_contract | simple_contract
any_contract.setName('Any simple or composite contract')
contract_expression << (any_contract)  # Parentheses before << !!
Пример #3
0
class WebParser(object):
    boolMaps = {"false": False, "true": True, "yes": True, "no": False}

    PSTYPE_DEFAULT = "DEFAULT"
    PSTYPE_JS = "JS"
    PSTYPE_DICT = "dict"
    PSTYPE_RATINGS = "ratings"

    def __init__(self, fromFile=False):
        self.fromFile = fromFile
        self.parseTypes = dict()
        self.__defineBasicTypes()
        self.__defineDictGrammar()
        self.__defineJSGrammar()

    quoteit = lambda self, v, lq='"', rq=None: \
        Suppress(lq) + Optional(v) + Suppress(rq) \
            if rq is not None else \
            Suppress(lq) + v + Suppress(lq)

    quoteitno = lambda self, v, lq='"', rq=None: \
        Suppress(lq) + v + Suppress(rq) \
            if rq is not None else \
            Suppress(lq) + v + Suppress(lq)

    datatypeAndQuote = lambda self, v, lq='"', rq=None: \
        MatchFirst([v, self.quoteitno(v, lq, rq)])

    completeType = lambda self, bt, name="", fn=None, lq='"', rq=None: \
        (self.datatypeAndQuote(bt, lq, rq)).setName(name).setParseAction(fn) \
            if fn is not None else \
            (self.datatypeAndQuote(bt, lq, rq)).setName(name)

    @logtrace
    def __defineBasicTypes(self):
        self.KDELIM = Suppress(":")
        sign = Word("+-", max=1) + FollowedBy(Word(nums))
        crncy = Word(nums) + ZeroOrMore(Suppress(",") + Word(nums)) + \
                Optional(Literal(".") + Word(nums))
        baseUnknownValue = Keyword("?")
        self.unknown = self.completeType(baseUnknownValue, "UNKNOWN_VAL",
                                         lambda t: np.nan)

        floatNumberBasic = Combine(Optional(sign) + \
                                   Or([Word(nums),
                                       crncy,
                                       Regex(r'[0-9]+(\.\d*)?([eE]\d+)?')])) + \
                           Optional(Suppress("%"))
        self.floatNumber = self.completeType(floatNumberBasic, "float",
                                             lambda t: float(t[0]))

        baseBoolValue = Or([
            CaselessKeyword("false"),
            CaselessKeyword("true"),
            CaselessKeyword("yes"),
            CaselessKeyword("no")
        ])
        self.boolean = self.completeType(baseBoolValue, "bool",
                                         lambda t: WebParser.boolMaps[t[0]])

        ratingKeywords = [CaselessKeyword(k).setParseAction( \
            lambda t: Ratings.ratingMaps[t[0].lower()]) \
                          for k in Ratings.ratingMaps.keys()]
        ratingKeywords.append(Keyword("--").setParseAction(lambda t: np.nan))
        self.ratings = self.completeType(Or(ratingKeywords), "ratings")
        self.parseTypes[WebParser.PSTYPE_RATINGS] = self.ratings

    @logtrace
    def __defineDictGrammar(self):
        """Function defines the grammar for parsing a string(mainly) into:
        1. Value: Value could be any one of the following
            1. Simple types such as:
                a. numbers: all are floating point
                b. boolean: [true,false], [yes, no]
                c. Strings within double quotes
                d. alphanumerics
            2. Dictionary
            3. List
        2. Dictionary: Set of key value pairs. ':' delimits values from keys.
        ',' delimites different pairs. '{}' delimits a dictionary.
        3. List: Ordered list of values delimited by ','
        pyparsing parse actions are used to convert the tokens into pyton native
        datatype such 'float' for floating point, 'dict' for dictionary and 
        'list' for list. The parser supports arbitrary nesting of the above 
        tokens. Both the nesting and datastructure type integrity is preserved
        in the resulting python representation.
        Application: 
        One of the main use of the grammar is to scrap web pages and extract a
        combination of JSON and javascript-like HTML attributes into python
        data structures. Simpler use cases include extracting supported simple 
        data types from say, HTML tables.  
        """
        dictDefn = Forward()
        listDefn = Forward()
        key = (QuotedString('"') | Word(alphas)) + FollowedBy(Literal(":"))
        key.setName("key")
        self.value = MatchFirst([
            self.unknown, self.floatNumber, self.boolean,
            QuotedString('"'),
            Word(alphanums), dictDefn, listDefn
        ])
        self.value.setName("value")
        # dict_element = Group(key + self.KDELIM + self.value)
        dict_element = Group(key + self.KDELIM + self.value) + \
                       FollowedBy(Or([Literal(","), Literal("}")]))
        lde = Group(Dict(delimitedList(dict_element)))
        dictDefn << ((self.quoteit(lde, '{', '}')) | lde)
        self.dictDefn = dictDefn
        self.dictDefn.setName("Dictionary")
        listDefn << self.quoteit(Group(delimitedList(self.value)), '[', ']')
        self.listDefn = listDefn
        self.listDefn.setName("List")
        self.topElement = Or([self.dictDefn, self.listDefn, self.value])
        self.parseTypes[WebParser.PSTYPE_DEFAULT] = self.topElement
        self.parseTypes[WebParser.PSTYPE_DICT] = self.dictDefn
        return

    @logtrace
    def __defineJSGrammar(self):
        identifier = Word(alphas + "_", alphanums + "_")
        jsFn = identifier + Suppress(".") + identifier
        jsArgs = Suppress("(") + self.topElement + Suppress(")")
        jsStmt = jsFn + jsArgs + Suppress(";")
        self.jsStmt = jsStmt.setName("JS_Statement")
        self.parseTypes[WebParser.PSTYPE_JS] = self.jsStmt

    @logtrace
    def __parse(self, inputStr, parseType):
        if self.fromFile:
            parsed = self.parseTypes[parseType].parseFile(inputStr)
        else:
            parsed = self.parseTypes[parseType].parseString(inputStr)
        if parseType == WebParser.PSTYPE_DEFAULT or \
                        parseType == WebParser.PSTYPE_RATINGS:
            return parsed[0]
        return parsed

    @logtrace
    def parse(self, inputStr, parseType=None):
        if parseType is None:
            parseType = WebParser.PSTYPE_DEFAULT
        return self.__parse(inputStr, parseType)