Пример #1
0
def _getSQLScriptGrammar():
    """returns a pyparsing ParserElement that splits SQL scripts into
	individual commands.

	The rules are: Statements are separated by semicolons, empty statements
	are allowed.
	"""
    with utils.pyparsingWhitechars(" \t"):
        atom = Forward()
        atom.setName("Atom")

        sqlComment = Literal("--") + SkipTo("\n", include=True)
        cStyleComment = Literal("/*") + SkipTo("*/", include=True)
        comment = sqlComment | cStyleComment
        lineEnd = Literal("\n")

        simpleStr = QuotedString(quoteChar="'",
                                 escChar="\\",
                                 multiline=True,
                                 unquoteResults=False)
        quotedId = QuotedString(quoteChar='"',
                                escChar="\\",
                                unquoteResults=False)
        dollarQuoted = Regex(r"(?s)\$(\w*)\$.*?\$\1\$")
        dollarQuoted.setName("dollarQuoted")
        # well, quotedId is not exactly a string literal.  I hate it, and so
        # it's lumped in here.
        strLiteral = simpleStr | dollarQuoted | quotedId
        strLiteral.setName("strLiteral")

        other = Regex("[^;'\"$]+")
        other.setName("other")

        literalDollar = Literal("$") + ~Literal("$")
        statementEnd = (Literal(';') + ZeroOrMore(lineEnd) | StringEnd())

        atom << (Suppress(comment) | other | strLiteral | literalDollar)
        statement = OneOrMore(atom) + Suppress(statementEnd)
        statement.setName("statement")
        statement.setParseAction(lambda s, p, toks: " ".join(toks))

        script = OneOrMore(statement) + StringEnd()
        script.setName("script")
        script.setParseAction(
            lambda s, p, toks: [t for t in toks.asList() if str(t).strip()])

        if False:
            atom.setDebug(True)
            comment.setDebug(True)
            other.setDebug(True)
            strLiteral.setDebug(True)
            statement.setDebug(True)
            statementEnd.setDebug(True)
            dollarQuoted.setDebug(True)
            literalDollar.setDebug(True)
        return script
Пример #2
0
def getComplexGrammar(baseLiteral,
                      pmBuilder,
                      errorLiteral=None,
                      nodeClass=NumericNode):
    """returns the root element of a grammar parsing numeric vizier-like 
	expressions.

	This is used for both dates and floats, use baseLiteral to match the
	operand terminal.  The trouble with dates is that the +/- operator
	has a simple float as the second operand, and that's why you can
	pass in an errorLiteral and and pmBuilder.
	"""
    if errorLiteral is None:
        errorLiteral = baseLiteral

    with utils.pyparsingWhitechars(" \t"):
        preOp = Literal("=") | Literal(">=") | Literal(">") | Literal(
            "<=") | Literal("<")
        rangeOp = Literal("..")
        pmOp = Literal("+/-") | Literal("\xb1".decode("iso-8859-1"))
        orOp = Literal("|")
        andOp = Literal("&")
        notOp = Literal("!")
        commaOp = Literal(",")

        preopExpr = Optional(preOp) + baseLiteral
        rangeExpr = baseLiteral + Suppress(rangeOp) + baseLiteral
        valList = baseLiteral + OneOrMore(Suppress(commaOp) + baseLiteral)
        pmExpr = baseLiteral + Suppress(pmOp) + errorLiteral
        simpleExpr = rangeExpr | pmExpr | valList | preopExpr

        expr = Forward()

        notExpr = Optional(notOp) + simpleExpr
        andExpr = notExpr + ZeroOrMore(Suppress(andOp) + notExpr)
        orExpr = andExpr + ZeroOrMore(Suppress(orOp) + expr)
        expr << orExpr
        exprInString = expr + StringEnd()

        rangeExpr.setName("rangeEx")
        rangeOp.setName("rangeOp")
        notExpr.setName("notEx")
        andExpr.setName("andEx")
        andOp.setName("&")
        orExpr.setName("orEx")
        expr.setName("expr")
        simpleExpr.setName("simpleEx")

        preopExpr.addParseAction(_simpleExprFactory(nodeClass))
        rangeExpr.addParseAction(_getNodeFactory("..", nodeClass))
        pmExpr.addParseAction(pmBuilder)
        valList.addParseAction(_getNodeFactory(",", nodeClass))
        notExpr.addParseAction(_makeNotNodeFactory(nodeClass))
        andExpr.addParseAction(_getBinopFactory("AND", nodeClass))
        orExpr.addParseAction(_getBinopFactory("OR", nodeClass))

        return exprInString
Пример #3
0
def getXMLGrammar(manipulator):

    with utils.pyparsingWhitechars("\r"):
        name = Word(alphas + "_:", alphanums + ".:_-")
        opener = Literal("<")
        closer = Literal(">")
        value = (
            QuotedString(quoteChar="'", multiline=True, unquoteResults=False)
            | QuotedString(quoteChar='"', multiline=True,
                           unquoteResults=False))
        attribute = (name + Literal("=") + value)
        tagOpener = (opener + name + ZeroOrMore(White() + attribute) +
                     Optional(White()))

        openingTag = (tagOpener + closer)
        closingTag = (opener + Literal("/") + name + Optional(White()) +
                      closer)
        emptyTag = (tagOpener + Optional(White()) + Literal("/>"))

        processingInstruction = (opener + Literal("?") +
                                 SkipTo("?>", include="True"))
        comment = (opener + Literal("!--") + SkipTo("-->", include="True"))
        cdataSection = (opener + Literal("![CDATA[") +
                        SkipTo("]]>", include="True"))

        nonTagStuff = CharsNotIn("<", min=1)

        docItem = Forward()
        element = ((openingTag + ZeroOrMore(docItem) + closingTag) | emptyTag)
        docItem << (element
                    | processingInstruction
                    | comment
                    | cdataSection
                    | nonTagStuff)

        document = (ZeroOrMore(Optional(White()) + docItem) +
                    Optional(White()) + StringEnd())
        document.parseWithTabs()

        element.addParseAction(manipulator._feedElement)
        tagOpener.addParseAction(manipulator._openElement)
        attribute.addParseAction(lambda s, p, t: [Attribute(t)])
        openingTag.addParseAction(_nodify)
        closingTag.addParseAction(_nodify)
        emptyTag.addParseAction(_nodify)

        del manipulator
        for el in locals().itervalues():
            # this *really* shouldn't be necessary
            el.leaveWhitespace()
        del el

        return locals()
Пример #4
0
def getUploadGrammar():
    from gavo.imp.pyparsing import (Word, ZeroOrMore, Suppress, StringEnd,
                                    alphas, alphanums, CharsNotIn)
    # Should we allow more tableNames?
    with utils.pyparsingWhitechars(" \t"):
        tableName = Word(alphas + "_", alphanums + "_")
        # What should we allow/forbid in terms of URIs?
        uri = CharsNotIn(" ;,")
        uploadSpec = tableName("name") + "," + uri("uri")
        uploads = uploadSpec + ZeroOrMore(Suppress(";") +
                                          uploadSpec) + StringEnd()
        uploadSpec.addParseAction(lambda s, p, t: (t["name"], t["uri"]))
        return uploads
Пример #5
0
def _getModelGrammar():
    from gavo.imp.pyparsing import (Literal, Optional, StringEnd, Suppress,
                                    Word, ZeroOrMore, alphas)

    with utils.pyparsingWhitechars(" \t"):
        metaKey = Word(alphas + ".")
        modChar = Literal('!') | '1'
        modifier = Suppress('(') + Optional(modChar) + Suppress(')')
        assertion = metaKey("key") + Optional(modifier)("mod")
        model = assertion + ZeroOrMore(Suppress(',') + assertion) + StringEnd()

    def _buildAssertion(s, p, toks):
        key = str(toks["key"])
        mod = tuple(toks.get("mod", ()))
        return _assertionCodes[mod](key)

    assertion.addParseAction(_buildAssertion)
    model.addParseAction(lambda s, p, toks: MetaValidator(toks))
    return model
Пример #6
0
def getStringGrammar():
    """returns a grammar for parsing vizier-like string expressions.
	"""
    # XXX TODO: should we cut at =| (which is currently parsed as = |)?
    with utils.pyparsingWhitechars(" \t"):
        simpleOperator = Literal("==") | Literal("!=") | Literal(">=") |\
         Literal(">") | Literal("<=") | Literal("<") | Literal("=~") |\
         Literal("=,")
        simpleOperand = Regex(r"[^\s].*|")
        # XXX probably a bug in pyparsing: White shouldn't be necessary here
        White = Word(" \t")
        simpleExpr = simpleOperator + Optional(White) + simpleOperand

        commaOperand = Regex("[^,]+")
        barOperand = Regex("[^|]+")
        commaEnum = Literal("=,") + commaOperand + ZeroOrMore(
            Suppress(",") + commaOperand)
        exclusionEnum = Literal("!=,") + commaOperand + ZeroOrMore(
            Suppress(",") + commaOperand)
        barEnum = Literal("=|") + barOperand + ZeroOrMore(
            Suppress("|") + barOperand)
        enumExpr = exclusionEnum | commaEnum | barEnum

        patLiterals = CharsNotIn("[*?")
        wildStar = Literal("*")
        wildQmark = Literal("?")
        setElems = CharsNotIn("]")
        setSpec = Suppress("[") + setElems + Suppress("]")
        pattern = OneOrMore(setSpec | wildStar | wildQmark | patLiterals)

        patternOperator = Literal("~") | Literal("=") | Literal("!~") |\
         Literal("!")
        patternExpr = patternOperator + Optional(White) + pattern
        nakedExpr = Regex("[^=!~|><]") + Optional(simpleOperand)

        stringExpr = enumExpr | simpleExpr | patternExpr | nakedExpr

        doc = stringExpr + StringEnd()

        stringExpr.setName("StringExpr")
        enumExpr.setName("EnumExpr")
        simpleOperand.setName("Operand")
        simpleOperator.setName("Operator")
        nakedExpr.setName("SingleOperand")

        debug = False
        stringExpr.setDebug(debug)
        enumExpr.setDebug(debug)
        patLiterals.setDebug(debug)
        simpleOperand.setDebug(debug)
        simpleOperator.setDebug(debug)
        nakedExpr.setDebug(debug)

        simpleExpr.addParseAction(_makeOpNode)
        patternExpr.addParseAction(_makeOpNode)
        enumExpr.addParseAction(_makeOpNode)
        makeDefaultExpr = _getNodeFactory("==", StringNode)
        nakedExpr.addParseAction(
            lambda s, p, toks: makeDefaultExpr(s, p, ["".join(toks)]))
        wildStar.addParseAction(_makeOpNode)
        wildQmark.addParseAction(_makeOpNode)
        setElems.addParseAction(_getNodeFactory("[", StringNode))

        return doc
Пример #7
0
def _getSTCSGrammar(numberLiteral,
                    timeLiteral,
                    _exportAll=False,
                    _addGeoReferences=False):
    """returns a dictionary of symbols for a grammar parsing STC-S into
	a concrete syntax tree.

	numberLiteral and timeLiteral are pyparsing symbols for numbers and
	datetimes, respectively.

	_addGeoReferences lets you write quoted references to vectors
	(like Circle "center" 20.).
	"""
    with utils.pyparsingWhitechars("\n\t\r "):

        number = numberLiteral
        del numberLiteral

        # units
        _unitOpener = Suppress(CaselessKeyword("unit"))
        _spaceUnitWord = Regex(_reFromKeys(spatialUnits))
        _timeUnitWord = Regex(_reFromKeys(temporalUnits))
        spaceUnit = _unitOpener - OneOrMore(_spaceUnitWord).addParseAction(
            _stringifyBlank)("unit")
        timeUnit = _unitOpener - _timeUnitWord("unit")
        spectralUnit = _unitOpener - Regex(_reFromKeys(spectralUnits))("unit")
        redshiftUnit = _unitOpener - (
            (_spaceUnitWord + "/" + _timeUnitWord).addParseAction(_stringify)
            | CaselessKeyword("nil"))("unit")
        velocityUnit = _unitOpener - (OneOrMore(
            (_spaceUnitWord + "/" + _timeUnitWord).addParseAction(
                _stringify)).addParseAction(_stringifyBlank))("unit")

        # basic productions common to most STC-S subphrases
        astroYear = Regex("[BJ][0-9]+([.][0-9]*)?")
        fillfactor = (Suppress(CaselessKeyword("fillfactor")) +
                      number("fillfactor"))
        noEqFrame = (CaselessKeyword("J2000")
                     | CaselessKeyword("B1950")
                     | CaselessKeyword("ICRS")
                     | CaselessKeyword("GALACTIC")
                     | CaselessKeyword("GALACTIC_I")
                     | CaselessKeyword("GALACTIC_II")
                     | CaselessKeyword("SUPER_GALACTIC")
                     | CaselessKeyword("GEO_C")
                     | CaselessKeyword("GEO_D")
                     | CaselessKeyword("HPR")
                     | CaselessKeyword("HGS")
                     | CaselessKeyword("HGC")
                     | CaselessKeyword("HPC")
                     | CaselessKeyword("UNKNOWNFrame"))("frame")
        eqFrameName = (CaselessKeyword("FK5")
                       | CaselessKeyword("FK4")
                       | CaselessKeyword("ECLIPTIC"))("frame")
        eqFrame = eqFrameName + Optional(astroYear("equinox"))
        frame = eqFrame | noEqFrame
        plEphemeris = CaselessKeyword("JPL-DE200") | CaselessKeyword(
            "JPL-DE405")
        refpos = ((Regex(_reFromKeys(common.stcRefPositions)))("refpos") +
                  Optional(plEphemeris("plEphemeris")))
        flavor = (Regex(_reFromKeys(stcsFlavors)))("flavor")

        # properties of coordinates
        error = Suppress(CaselessKeyword("Error")) + OneOrMore(number)
        resolution = Suppress(
            CaselessKeyword("Resolution")) + OneOrMore(number)
        size = Suppress(CaselessKeyword("Size")) + OneOrMore(number)
        pixSize = Suppress(CaselessKeyword("PixSize")) + OneOrMore(number)
        cooProps = (Optional(error("error")) +
                    Optional(resolution("resolution")) +
                    Optional(size("size")) + Optional(pixSize("pixSize")))

        # properties of most spatial specs
        _coos = ZeroOrMore(number)("coos")
        _pos = Optional(ZeroOrMore(number)("pos"))
        if _addGeoReferences:  # include references to vectors, for getColrefSymbols
            complexColRef = Regex(
                '[[][A-Za-z_][A-Za-z_0-9]*[]]').addParseAction(
                    lambda s, p, toks: common.GeometryColRef(toks[0][1:-1]))
            _coos = complexColRef("coos") | _coos
            _pos = complexColRef("pos") | _pos
        positionSpec = Suppress(CaselessKeyword("Position")) + _pos
        epochSpec = Suppress(CaselessKeyword("Epoch")) - astroYear
        _spatialProps = Optional(spaceUnit) + cooProps
        velocitySpec = (CaselessKeyword("Velocity")("type") +
                        OneOrMore(number)("pos"))
        velocityInterval = (Optional(
            CaselessKeyword("VelocityInterval")("type") +
            Optional(fillfactor) + _coos) + Optional(velocitySpec) +
                            Optional(velocityUnit) +
                            cooProps).addParseAction(makeTree)
        _spatialTail = (_spatialProps + Optional(velocityInterval)("velocity"))
        _regionTail = Optional(positionSpec) + _spatialTail
        _commonSpaceItems = (
            frame + Optional(refpos) + Optional(flavor) +
            Optional(epochSpec("epoch").addParseAction(_stringify)))
        _commonRegionItems = Optional(fillfactor) + _commonSpaceItems

        # times and time intervals
        timescale = (Regex("|".join(common.stcTimeScales)))("timescale")
        timephrase = Suppress(CaselessKeyword("Time")) + timeLiteral
        _commonTimeItems = Optional(timeUnit) + cooProps
        _intervalOpener = (Optional(fillfactor) +
                           Optional(timescale("timescale")) + Optional(refpos))
        _intervalCloser = Optional(timephrase("pos")) + _commonTimeItems

        timeInterval = (CaselessKeyword("TimeInterval")("type") +
                        _intervalOpener + ZeroOrMore(timeLiteral)("coos") +
                        _intervalCloser)
        startTime = (CaselessKeyword("StartTime")("type") + _intervalOpener +
                     timeLiteral.setResultsName("coos", True) +
                     _intervalCloser)
        stopTime = (CaselessKeyword("StopTime")("type") + _intervalOpener +
                    timeLiteral.setResultsName("coos", True) + _intervalCloser)
        time = (CaselessKeyword("Time")("type") +
                Optional(timescale("timescale")) + Optional(refpos) +
                Optional(timeLiteral.setResultsName("pos", True)) +
                _commonTimeItems)
        timeSubPhrase = (timeInterval
                         | startTime
                         | stopTime
                         | time).addParseAction(makeTree)

        # atomic "geometries"; I do not bother to specify their actual
        # arguments since, without knowing the frame, they may be basically
        # anthing.   Also, I want to allow geometry column references.
        _atomicGeometryKey = (CaselessKeyword("AllSky").setName("sub-geometry")
                              | CaselessKeyword("Circle")
                              | CaselessKeyword("Ellipse")
                              | CaselessKeyword("Box")
                              | CaselessKeyword("Polygon")
                              | CaselessKeyword("Convex")
                              | CaselessKeyword("PositionInterval"))
        atomicGeometry = (_atomicGeometryKey("type") + _commonRegionItems +
                          _coos + _regionTail)

        # compound "geometries"
        _compoundGeoExpression = Forward()
        _compoundGeoOperand = (
            (_atomicGeometryKey("subtype") + _coos)
            | _compoundGeoExpression).addParseAction(lambda s, p, t: dict(t))

        _compoundGeoOperatorUnary = CaselessKeyword("Not")
        _compoundGeoOperandsUnary = (Suppress('(') + _compoundGeoOperand +
                                     Suppress(')'))
        _compoundGeoExprUnary = (_compoundGeoOperatorUnary("subtype") +
                                 _compoundGeoOperandsUnary("children"))

        _compoundGeoOperatorBinary = CaselessKeyword("Difference")
        _compoundGeoOperandsBinary = (Suppress('(') + _compoundGeoOperand +
                                      _compoundGeoOperand + Suppress(')'))
        _compoundGeoExprBinary = (_compoundGeoOperatorBinary("subtype") +
                                  _compoundGeoOperandsBinary("children"))

        _compoundGeoOperatorNary = (CaselessKeyword("Union")
                                    | CaselessKeyword("Intersection"))
        _compoundGeoOperandsNary = (Suppress('(') + _compoundGeoOperand +
                                    _compoundGeoOperand +
                                    ZeroOrMore(_compoundGeoOperand) +
                                    Suppress(')'))
        _compoundGeoExprNary = (_compoundGeoOperatorNary("subtype") +
                                _compoundGeoOperandsNary("children"))

        _compoundGeoExpression << (_compoundGeoExprUnary
                                   | _compoundGeoExprBinary
                                   | _compoundGeoExprNary)
        compoundGeoPhrase = (
            _compoundGeoOperatorUnary("type") + _commonRegionItems +
            _compoundGeoOperandsUnary("children") + _regionTail
            | _compoundGeoOperatorBinary("type") + _commonRegionItems +
            _compoundGeoOperandsBinary("children") + _regionTail
            | _compoundGeoOperatorNary("type") + _commonRegionItems -
            _compoundGeoOperandsNary("children") + _regionTail)

        # space subphrase
        positionInterval = (CaselessKeyword("PositionInterval")("type") +
                            _commonRegionItems + _coos + _regionTail)
        position = (CaselessKeyword("Position")("type") + _commonSpaceItems +
                    _pos + _spatialTail)
        spaceSubPhrase = (positionInterval
                          | position
                          | atomicGeometry
                          | compoundGeoPhrase).addParseAction(makeTree)

        # spectral subphrase
        spectralSpec = (Suppress(CaselessKeyword("Spectral")) + number)("pos")
        _spectralTail = Optional(spectralUnit) + cooProps
        spectralInterval = (CaselessKeyword("SpectralInterval")("type") +
                            Optional(fillfactor) + Optional(refpos) + _coos +
                            Optional(spectralSpec) + _spectralTail)
        spectral = (CaselessKeyword("Spectral")("type") + Optional(refpos) +
                    _pos + _spectralTail)
        spectralSubPhrase = (spectralInterval
                             | spectral).addParseAction(makeTree)

        # redshift subphrase
        redshiftType = Regex("VELOCITY|REDSHIFT")("redshiftType")
        redshiftSpec = (Suppress(CaselessKeyword("Redshift")) + number)("pos")
        dopplerdef = Regex("OPTICAL|RADIO|RELATIVISTIC")("dopplerdef")
        _redshiftTail = Optional(redshiftUnit) + cooProps
        redshiftInterval = (CaselessKeyword("RedshiftInterval")("type") +
                            Optional(fillfactor) + Optional(refpos) +
                            Optional(redshiftType) + Optional(dopplerdef) +
                            _coos + Optional(redshiftSpec) + _redshiftTail)
        redshift = (CaselessKeyword("Redshift")("type") + Optional(refpos) +
                    Optional(redshiftType) + Optional(dopplerdef) + _pos +
                    _redshiftTail)
        redshiftSubPhrase = (redshiftInterval
                             | redshift).addParseAction(makeTree)

        # system subphrase (extension, see docs)
        # ids match Name from XML spec; we're not doing char refs and similar here
        xmlName = Word(alphas + "_:",
                       alphanums + '.-_:').addParseAction(_stringify)
        systemDefinition = (Suppress(CaselessKeyword("System")) +
                            xmlName("libSystem"))

        # top level
        stcsPhrase = (  #noflake: stcsPhrase is returned through locals()
            Optional(timeSubPhrase)("time") + Optional(spaceSubPhrase)
            ("space") + Optional(spectralSubPhrase)("spectral") +
            Optional(redshiftSubPhrase)("redshift") +
            Optional(systemDefinition)) + StringEnd()

        return _makeSymDict(locals(), _exportAll)