def _getSQLScriptGrammar(): """returns a pyparsing ParserElement that splits SQL scripts into individual commands. The rules are: Statements are separated by semicolons, empty statements are allowed. """ with utils.pyparsingWhitechars(" \t"): atom = Forward() atom.setName("Atom") sqlComment = Literal("--") + SkipTo("\n", include=True) cStyleComment = Literal("/*") + SkipTo("*/", include=True) comment = sqlComment | cStyleComment lineEnd = Literal("\n") simpleStr = QuotedString(quoteChar="'", escChar="\\", multiline=True, unquoteResults=False) quotedId = QuotedString(quoteChar='"', escChar="\\", unquoteResults=False) dollarQuoted = Regex(r"(?s)\$(\w*)\$.*?\$\1\$") dollarQuoted.setName("dollarQuoted") # well, quotedId is not exactly a string literal. I hate it, and so # it's lumped in here. strLiteral = simpleStr | dollarQuoted | quotedId strLiteral.setName("strLiteral") other = Regex("[^;'\"$]+") other.setName("other") literalDollar = Literal("$") + ~Literal("$") statementEnd = (Literal(';') + ZeroOrMore(lineEnd) | StringEnd()) atom << (Suppress(comment) | other | strLiteral | literalDollar) statement = OneOrMore(atom) + Suppress(statementEnd) statement.setName("statement") statement.setParseAction(lambda s, p, toks: " ".join(toks)) script = OneOrMore(statement) + StringEnd() script.setName("script") script.setParseAction( lambda s, p, toks: [t for t in toks.asList() if str(t).strip()]) if False: atom.setDebug(True) comment.setDebug(True) other.setDebug(True) strLiteral.setDebug(True) statement.setDebug(True) statementEnd.setDebug(True) dollarQuoted.setDebug(True) literalDollar.setDebug(True) return script
def getComplexGrammar(baseLiteral, pmBuilder, errorLiteral=None, nodeClass=NumericNode): """returns the root element of a grammar parsing numeric vizier-like expressions. This is used for both dates and floats, use baseLiteral to match the operand terminal. The trouble with dates is that the +/- operator has a simple float as the second operand, and that's why you can pass in an errorLiteral and and pmBuilder. """ if errorLiteral is None: errorLiteral = baseLiteral with utils.pyparsingWhitechars(" \t"): preOp = Literal("=") | Literal(">=") | Literal(">") | Literal( "<=") | Literal("<") rangeOp = Literal("..") pmOp = Literal("+/-") | Literal("\xb1".decode("iso-8859-1")) orOp = Literal("|") andOp = Literal("&") notOp = Literal("!") commaOp = Literal(",") preopExpr = Optional(preOp) + baseLiteral rangeExpr = baseLiteral + Suppress(rangeOp) + baseLiteral valList = baseLiteral + OneOrMore(Suppress(commaOp) + baseLiteral) pmExpr = baseLiteral + Suppress(pmOp) + errorLiteral simpleExpr = rangeExpr | pmExpr | valList | preopExpr expr = Forward() notExpr = Optional(notOp) + simpleExpr andExpr = notExpr + ZeroOrMore(Suppress(andOp) + notExpr) orExpr = andExpr + ZeroOrMore(Suppress(orOp) + expr) expr << orExpr exprInString = expr + StringEnd() rangeExpr.setName("rangeEx") rangeOp.setName("rangeOp") notExpr.setName("notEx") andExpr.setName("andEx") andOp.setName("&") orExpr.setName("orEx") expr.setName("expr") simpleExpr.setName("simpleEx") preopExpr.addParseAction(_simpleExprFactory(nodeClass)) rangeExpr.addParseAction(_getNodeFactory("..", nodeClass)) pmExpr.addParseAction(pmBuilder) valList.addParseAction(_getNodeFactory(",", nodeClass)) notExpr.addParseAction(_makeNotNodeFactory(nodeClass)) andExpr.addParseAction(_getBinopFactory("AND", nodeClass)) orExpr.addParseAction(_getBinopFactory("OR", nodeClass)) return exprInString
def getXMLGrammar(manipulator): with utils.pyparsingWhitechars("\r"): name = Word(alphas + "_:", alphanums + ".:_-") opener = Literal("<") closer = Literal(">") value = ( QuotedString(quoteChar="'", multiline=True, unquoteResults=False) | QuotedString(quoteChar='"', multiline=True, unquoteResults=False)) attribute = (name + Literal("=") + value) tagOpener = (opener + name + ZeroOrMore(White() + attribute) + Optional(White())) openingTag = (tagOpener + closer) closingTag = (opener + Literal("/") + name + Optional(White()) + closer) emptyTag = (tagOpener + Optional(White()) + Literal("/>")) processingInstruction = (opener + Literal("?") + SkipTo("?>", include="True")) comment = (opener + Literal("!--") + SkipTo("-->", include="True")) cdataSection = (opener + Literal("![CDATA[") + SkipTo("]]>", include="True")) nonTagStuff = CharsNotIn("<", min=1) docItem = Forward() element = ((openingTag + ZeroOrMore(docItem) + closingTag) | emptyTag) docItem << (element | processingInstruction | comment | cdataSection | nonTagStuff) document = (ZeroOrMore(Optional(White()) + docItem) + Optional(White()) + StringEnd()) document.parseWithTabs() element.addParseAction(manipulator._feedElement) tagOpener.addParseAction(manipulator._openElement) attribute.addParseAction(lambda s, p, t: [Attribute(t)]) openingTag.addParseAction(_nodify) closingTag.addParseAction(_nodify) emptyTag.addParseAction(_nodify) del manipulator for el in locals().itervalues(): # this *really* shouldn't be necessary el.leaveWhitespace() del el return locals()
def getUploadGrammar(): from gavo.imp.pyparsing import (Word, ZeroOrMore, Suppress, StringEnd, alphas, alphanums, CharsNotIn) # Should we allow more tableNames? with utils.pyparsingWhitechars(" \t"): tableName = Word(alphas + "_", alphanums + "_") # What should we allow/forbid in terms of URIs? uri = CharsNotIn(" ;,") uploadSpec = tableName("name") + "," + uri("uri") uploads = uploadSpec + ZeroOrMore(Suppress(";") + uploadSpec) + StringEnd() uploadSpec.addParseAction(lambda s, p, t: (t["name"], t["uri"])) return uploads
def _getModelGrammar(): from gavo.imp.pyparsing import (Literal, Optional, StringEnd, Suppress, Word, ZeroOrMore, alphas) with utils.pyparsingWhitechars(" \t"): metaKey = Word(alphas + ".") modChar = Literal('!') | '1' modifier = Suppress('(') + Optional(modChar) + Suppress(')') assertion = metaKey("key") + Optional(modifier)("mod") model = assertion + ZeroOrMore(Suppress(',') + assertion) + StringEnd() def _buildAssertion(s, p, toks): key = str(toks["key"]) mod = tuple(toks.get("mod", ())) return _assertionCodes[mod](key) assertion.addParseAction(_buildAssertion) model.addParseAction(lambda s, p, toks: MetaValidator(toks)) return model
def getStringGrammar(): """returns a grammar for parsing vizier-like string expressions. """ # XXX TODO: should we cut at =| (which is currently parsed as = |)? with utils.pyparsingWhitechars(" \t"): simpleOperator = Literal("==") | Literal("!=") | Literal(">=") |\ Literal(">") | Literal("<=") | Literal("<") | Literal("=~") |\ Literal("=,") simpleOperand = Regex(r"[^\s].*|") # XXX probably a bug in pyparsing: White shouldn't be necessary here White = Word(" \t") simpleExpr = simpleOperator + Optional(White) + simpleOperand commaOperand = Regex("[^,]+") barOperand = Regex("[^|]+") commaEnum = Literal("=,") + commaOperand + ZeroOrMore( Suppress(",") + commaOperand) exclusionEnum = Literal("!=,") + commaOperand + ZeroOrMore( Suppress(",") + commaOperand) barEnum = Literal("=|") + barOperand + ZeroOrMore( Suppress("|") + barOperand) enumExpr = exclusionEnum | commaEnum | barEnum patLiterals = CharsNotIn("[*?") wildStar = Literal("*") wildQmark = Literal("?") setElems = CharsNotIn("]") setSpec = Suppress("[") + setElems + Suppress("]") pattern = OneOrMore(setSpec | wildStar | wildQmark | patLiterals) patternOperator = Literal("~") | Literal("=") | Literal("!~") |\ Literal("!") patternExpr = patternOperator + Optional(White) + pattern nakedExpr = Regex("[^=!~|><]") + Optional(simpleOperand) stringExpr = enumExpr | simpleExpr | patternExpr | nakedExpr doc = stringExpr + StringEnd() stringExpr.setName("StringExpr") enumExpr.setName("EnumExpr") simpleOperand.setName("Operand") simpleOperator.setName("Operator") nakedExpr.setName("SingleOperand") debug = False stringExpr.setDebug(debug) enumExpr.setDebug(debug) patLiterals.setDebug(debug) simpleOperand.setDebug(debug) simpleOperator.setDebug(debug) nakedExpr.setDebug(debug) simpleExpr.addParseAction(_makeOpNode) patternExpr.addParseAction(_makeOpNode) enumExpr.addParseAction(_makeOpNode) makeDefaultExpr = _getNodeFactory("==", StringNode) nakedExpr.addParseAction( lambda s, p, toks: makeDefaultExpr(s, p, ["".join(toks)])) wildStar.addParseAction(_makeOpNode) wildQmark.addParseAction(_makeOpNode) setElems.addParseAction(_getNodeFactory("[", StringNode)) return doc
def _getSTCSGrammar(numberLiteral, timeLiteral, _exportAll=False, _addGeoReferences=False): """returns a dictionary of symbols for a grammar parsing STC-S into a concrete syntax tree. numberLiteral and timeLiteral are pyparsing symbols for numbers and datetimes, respectively. _addGeoReferences lets you write quoted references to vectors (like Circle "center" 20.). """ with utils.pyparsingWhitechars("\n\t\r "): number = numberLiteral del numberLiteral # units _unitOpener = Suppress(CaselessKeyword("unit")) _spaceUnitWord = Regex(_reFromKeys(spatialUnits)) _timeUnitWord = Regex(_reFromKeys(temporalUnits)) spaceUnit = _unitOpener - OneOrMore(_spaceUnitWord).addParseAction( _stringifyBlank)("unit") timeUnit = _unitOpener - _timeUnitWord("unit") spectralUnit = _unitOpener - Regex(_reFromKeys(spectralUnits))("unit") redshiftUnit = _unitOpener - ( (_spaceUnitWord + "/" + _timeUnitWord).addParseAction(_stringify) | CaselessKeyword("nil"))("unit") velocityUnit = _unitOpener - (OneOrMore( (_spaceUnitWord + "/" + _timeUnitWord).addParseAction( _stringify)).addParseAction(_stringifyBlank))("unit") # basic productions common to most STC-S subphrases astroYear = Regex("[BJ][0-9]+([.][0-9]*)?") fillfactor = (Suppress(CaselessKeyword("fillfactor")) + number("fillfactor")) noEqFrame = (CaselessKeyword("J2000") | CaselessKeyword("B1950") | CaselessKeyword("ICRS") | CaselessKeyword("GALACTIC") | CaselessKeyword("GALACTIC_I") | CaselessKeyword("GALACTIC_II") | CaselessKeyword("SUPER_GALACTIC") | CaselessKeyword("GEO_C") | CaselessKeyword("GEO_D") | CaselessKeyword("HPR") | CaselessKeyword("HGS") | CaselessKeyword("HGC") | CaselessKeyword("HPC") | CaselessKeyword("UNKNOWNFrame"))("frame") eqFrameName = (CaselessKeyword("FK5") | CaselessKeyword("FK4") | CaselessKeyword("ECLIPTIC"))("frame") eqFrame = eqFrameName + Optional(astroYear("equinox")) frame = eqFrame | noEqFrame plEphemeris = CaselessKeyword("JPL-DE200") | CaselessKeyword( "JPL-DE405") refpos = ((Regex(_reFromKeys(common.stcRefPositions)))("refpos") + Optional(plEphemeris("plEphemeris"))) flavor = (Regex(_reFromKeys(stcsFlavors)))("flavor") # properties of coordinates error = Suppress(CaselessKeyword("Error")) + OneOrMore(number) resolution = Suppress( CaselessKeyword("Resolution")) + OneOrMore(number) size = Suppress(CaselessKeyword("Size")) + OneOrMore(number) pixSize = Suppress(CaselessKeyword("PixSize")) + OneOrMore(number) cooProps = (Optional(error("error")) + Optional(resolution("resolution")) + Optional(size("size")) + Optional(pixSize("pixSize"))) # properties of most spatial specs _coos = ZeroOrMore(number)("coos") _pos = Optional(ZeroOrMore(number)("pos")) if _addGeoReferences: # include references to vectors, for getColrefSymbols complexColRef = Regex( '[[][A-Za-z_][A-Za-z_0-9]*[]]').addParseAction( lambda s, p, toks: common.GeometryColRef(toks[0][1:-1])) _coos = complexColRef("coos") | _coos _pos = complexColRef("pos") | _pos positionSpec = Suppress(CaselessKeyword("Position")) + _pos epochSpec = Suppress(CaselessKeyword("Epoch")) - astroYear _spatialProps = Optional(spaceUnit) + cooProps velocitySpec = (CaselessKeyword("Velocity")("type") + OneOrMore(number)("pos")) velocityInterval = (Optional( CaselessKeyword("VelocityInterval")("type") + Optional(fillfactor) + _coos) + Optional(velocitySpec) + Optional(velocityUnit) + cooProps).addParseAction(makeTree) _spatialTail = (_spatialProps + Optional(velocityInterval)("velocity")) _regionTail = Optional(positionSpec) + _spatialTail _commonSpaceItems = ( frame + Optional(refpos) + Optional(flavor) + Optional(epochSpec("epoch").addParseAction(_stringify))) _commonRegionItems = Optional(fillfactor) + _commonSpaceItems # times and time intervals timescale = (Regex("|".join(common.stcTimeScales)))("timescale") timephrase = Suppress(CaselessKeyword("Time")) + timeLiteral _commonTimeItems = Optional(timeUnit) + cooProps _intervalOpener = (Optional(fillfactor) + Optional(timescale("timescale")) + Optional(refpos)) _intervalCloser = Optional(timephrase("pos")) + _commonTimeItems timeInterval = (CaselessKeyword("TimeInterval")("type") + _intervalOpener + ZeroOrMore(timeLiteral)("coos") + _intervalCloser) startTime = (CaselessKeyword("StartTime")("type") + _intervalOpener + timeLiteral.setResultsName("coos", True) + _intervalCloser) stopTime = (CaselessKeyword("StopTime")("type") + _intervalOpener + timeLiteral.setResultsName("coos", True) + _intervalCloser) time = (CaselessKeyword("Time")("type") + Optional(timescale("timescale")) + Optional(refpos) + Optional(timeLiteral.setResultsName("pos", True)) + _commonTimeItems) timeSubPhrase = (timeInterval | startTime | stopTime | time).addParseAction(makeTree) # atomic "geometries"; I do not bother to specify their actual # arguments since, without knowing the frame, they may be basically # anthing. Also, I want to allow geometry column references. _atomicGeometryKey = (CaselessKeyword("AllSky").setName("sub-geometry") | CaselessKeyword("Circle") | CaselessKeyword("Ellipse") | CaselessKeyword("Box") | CaselessKeyword("Polygon") | CaselessKeyword("Convex") | CaselessKeyword("PositionInterval")) atomicGeometry = (_atomicGeometryKey("type") + _commonRegionItems + _coos + _regionTail) # compound "geometries" _compoundGeoExpression = Forward() _compoundGeoOperand = ( (_atomicGeometryKey("subtype") + _coos) | _compoundGeoExpression).addParseAction(lambda s, p, t: dict(t)) _compoundGeoOperatorUnary = CaselessKeyword("Not") _compoundGeoOperandsUnary = (Suppress('(') + _compoundGeoOperand + Suppress(')')) _compoundGeoExprUnary = (_compoundGeoOperatorUnary("subtype") + _compoundGeoOperandsUnary("children")) _compoundGeoOperatorBinary = CaselessKeyword("Difference") _compoundGeoOperandsBinary = (Suppress('(') + _compoundGeoOperand + _compoundGeoOperand + Suppress(')')) _compoundGeoExprBinary = (_compoundGeoOperatorBinary("subtype") + _compoundGeoOperandsBinary("children")) _compoundGeoOperatorNary = (CaselessKeyword("Union") | CaselessKeyword("Intersection")) _compoundGeoOperandsNary = (Suppress('(') + _compoundGeoOperand + _compoundGeoOperand + ZeroOrMore(_compoundGeoOperand) + Suppress(')')) _compoundGeoExprNary = (_compoundGeoOperatorNary("subtype") + _compoundGeoOperandsNary("children")) _compoundGeoExpression << (_compoundGeoExprUnary | _compoundGeoExprBinary | _compoundGeoExprNary) compoundGeoPhrase = ( _compoundGeoOperatorUnary("type") + _commonRegionItems + _compoundGeoOperandsUnary("children") + _regionTail | _compoundGeoOperatorBinary("type") + _commonRegionItems + _compoundGeoOperandsBinary("children") + _regionTail | _compoundGeoOperatorNary("type") + _commonRegionItems - _compoundGeoOperandsNary("children") + _regionTail) # space subphrase positionInterval = (CaselessKeyword("PositionInterval")("type") + _commonRegionItems + _coos + _regionTail) position = (CaselessKeyword("Position")("type") + _commonSpaceItems + _pos + _spatialTail) spaceSubPhrase = (positionInterval | position | atomicGeometry | compoundGeoPhrase).addParseAction(makeTree) # spectral subphrase spectralSpec = (Suppress(CaselessKeyword("Spectral")) + number)("pos") _spectralTail = Optional(spectralUnit) + cooProps spectralInterval = (CaselessKeyword("SpectralInterval")("type") + Optional(fillfactor) + Optional(refpos) + _coos + Optional(spectralSpec) + _spectralTail) spectral = (CaselessKeyword("Spectral")("type") + Optional(refpos) + _pos + _spectralTail) spectralSubPhrase = (spectralInterval | spectral).addParseAction(makeTree) # redshift subphrase redshiftType = Regex("VELOCITY|REDSHIFT")("redshiftType") redshiftSpec = (Suppress(CaselessKeyword("Redshift")) + number)("pos") dopplerdef = Regex("OPTICAL|RADIO|RELATIVISTIC")("dopplerdef") _redshiftTail = Optional(redshiftUnit) + cooProps redshiftInterval = (CaselessKeyword("RedshiftInterval")("type") + Optional(fillfactor) + Optional(refpos) + Optional(redshiftType) + Optional(dopplerdef) + _coos + Optional(redshiftSpec) + _redshiftTail) redshift = (CaselessKeyword("Redshift")("type") + Optional(refpos) + Optional(redshiftType) + Optional(dopplerdef) + _pos + _redshiftTail) redshiftSubPhrase = (redshiftInterval | redshift).addParseAction(makeTree) # system subphrase (extension, see docs) # ids match Name from XML spec; we're not doing char refs and similar here xmlName = Word(alphas + "_:", alphanums + '.-_:').addParseAction(_stringify) systemDefinition = (Suppress(CaselessKeyword("System")) + xmlName("libSystem")) # top level stcsPhrase = ( #noflake: stcsPhrase is returned through locals() Optional(timeSubPhrase)("time") + Optional(spaceSubPhrase) ("space") + Optional(spectralSubPhrase)("spectral") + Optional(redshiftSubPhrase)("redshift") + Optional(systemDefinition)) + StringEnd() return _makeSymDict(locals(), _exportAll)