def getComplexGrammar(baseLiteral, pmBuilder, errorLiteral=None, nodeClass=NumericNode): """returns the root element of a grammar parsing numeric vizier-like expressions. This is used for both dates and floats, use baseLiteral to match the operand terminal. The trouble with dates is that the +/- operator has a simple float as the second operand, and that's why you can pass in an errorLiteral and and pmBuilder. """ if errorLiteral is None: errorLiteral = baseLiteral with utils.pyparsingWhitechars(" \t"): preOp = Literal("=") | Literal(">=") | Literal(">") | Literal( "<=") | Literal("<") rangeOp = Literal("..") pmOp = Literal("+/-") | Literal("\xb1".decode("iso-8859-1")) orOp = Literal("|") andOp = Literal("&") notOp = Literal("!") commaOp = Literal(",") preopExpr = Optional(preOp) + baseLiteral rangeExpr = baseLiteral + Suppress(rangeOp) + baseLiteral valList = baseLiteral + OneOrMore(Suppress(commaOp) + baseLiteral) pmExpr = baseLiteral + Suppress(pmOp) + errorLiteral simpleExpr = rangeExpr | pmExpr | valList | preopExpr expr = Forward() notExpr = Optional(notOp) + simpleExpr andExpr = notExpr + ZeroOrMore(Suppress(andOp) + notExpr) orExpr = andExpr + ZeroOrMore(Suppress(orOp) + expr) expr << orExpr exprInString = expr + StringEnd() rangeExpr.setName("rangeEx") rangeOp.setName("rangeOp") notExpr.setName("notEx") andExpr.setName("andEx") andOp.setName("&") orExpr.setName("orEx") expr.setName("expr") simpleExpr.setName("simpleEx") preopExpr.addParseAction(_simpleExprFactory(nodeClass)) rangeExpr.addParseAction(_getNodeFactory("..", nodeClass)) pmExpr.addParseAction(pmBuilder) valList.addParseAction(_getNodeFactory(",", nodeClass)) notExpr.addParseAction(_makeNotNodeFactory(nodeClass)) andExpr.addParseAction(_getBinopFactory("AND", nodeClass)) orExpr.addParseAction(_getBinopFactory("OR", nodeClass)) return exprInString
def getXMLGrammar(manipulator): with utils.pyparsingWhitechars("\r"): name = Word(alphas + "_:", alphanums + ".:_-") opener = Literal("<") closer = Literal(">") value = ( QuotedString(quoteChar="'", multiline=True, unquoteResults=False) | QuotedString(quoteChar='"', multiline=True, unquoteResults=False)) attribute = (name + Literal("=") + value) tagOpener = (opener + name + ZeroOrMore(White() + attribute) + Optional(White())) openingTag = (tagOpener + closer) closingTag = (opener + Literal("/") + name + Optional(White()) + closer) emptyTag = (tagOpener + Optional(White()) + Literal("/>")) processingInstruction = (opener + Literal("?") + SkipTo("?>", include="True")) comment = (opener + Literal("!--") + SkipTo("-->", include="True")) cdataSection = (opener + Literal("![CDATA[") + SkipTo("]]>", include="True")) nonTagStuff = CharsNotIn("<", min=1) docItem = Forward() element = ((openingTag + ZeroOrMore(docItem) + closingTag) | emptyTag) docItem << (element | processingInstruction | comment | cdataSection | nonTagStuff) document = (ZeroOrMore(Optional(White()) + docItem) + Optional(White()) + StringEnd()) document.parseWithTabs() element.addParseAction(manipulator._feedElement) tagOpener.addParseAction(manipulator._openElement) attribute.addParseAction(lambda s, p, t: [Attribute(t)]) openingTag.addParseAction(_nodify) closingTag.addParseAction(_nodify) emptyTag.addParseAction(_nodify) del manipulator for el in locals().itervalues(): # this *really* shouldn't be necessary el.leaveWhitespace() del el return locals()
def _getMacroGrammar(self, debug=False): with utils.pyparsingWhitechars(" \t"): macro = Forward() quoteEscape = (Literal("\\{").addParseAction(lambda *args: "{") | Literal("\\}").addParseAction(lambda *args: "}")) charRun = Regex(r"[^}\\]+") argElement = macro | quoteEscape | charRun argument = Suppress("{") + ZeroOrMore(argElement) + Suppress("}") argument.addParseAction(lambda s, pos, toks: "".join(toks)) arguments = ZeroOrMore(argument) arguments.setWhitespaceChars("") macroName = Regex("[A-Za-z_][A-Za-z_0-9]+") macroName.setWhitespaceChars("") macro << Suppress("\\") + macroName + arguments macro.addParseAction(self._execMacro) literalBackslash = Literal("\\\\") literalBackslash.addParseAction(lambda *args: "\\") suppressedLF = Literal("\\\n") suppressedLF.addParseAction(lambda *args: " ") glue = Literal("\\+") glue.addParseAction(lambda *args: "") return literalBackslash | suppressedLF | glue | macro
def getSimpleSTCSParser(): from gavo.imp.pyparsing import ( Regex, CaselessKeyword, OneOrMore, Forward, Suppress, Optional, ParseException, ParseSyntaxException) with utils.pyparsingWhitechars(" \t\n\r"): frameRE = _makeRE(TAP_SYSTEMS) refposRE = _makeRE(TAP_REFPOS) flavorRE = _makeRE(TAP_FLAVORS) systemRE = (r"(?i)\s*" r"(?P<frame>%s)?\s*" r"(?P<refpos>%s)?\s*" r"(?P<flavor>%s)?\s*")%( frameRE, refposRE, flavorRE) coordsRE = r"(?P<coords>(%s\s*)+)"%utils.floatRE simpleStatement = Regex("(?i)\s*" "(?P<shape>position|circle|box|polygon)" +systemRE +coordsRE) simpleStatement.setName("STC-S geometry") simpleStatement.addParseAction(lambda s,p,t: _makePgSphereInstance(t)) system = Regex(systemRE) system.setName("STC-S system spec") region = Forward() notExpr = CaselessKeyword("NOT") + Suppress('(') + region + Suppress(')') notExpr.addParseAction(lambda s,p,t: GeomExpr("UNKNOWN", "NOT", (t[1],))) opExpr = ( (CaselessKeyword("UNION") | CaselessKeyword("INTERSECTION"))("op") + Optional(Regex(frameRE))("frame") + Optional(Regex(refposRE)) + Optional(Regex(flavorRE)) + Suppress("(") + region + OneOrMore(region) + Suppress(")")) opExpr.addParseAction( lambda s,p,t: GeomExpr(str(t["frame"]), t[0].upper(), t[2:])) region << (simpleStatement | opExpr | notExpr) def parse(s): if s is None or not s.strip(): # special service: Null values return None if isinstance(s, pgsphere.PgSAdapter): return s try: res = utils.pyparseString(region, s, parseAll=True)[0] if not res.cooSys or res.cooSys.lower()=='unknownframe': # Sigh. res.cooSys = "UNKNOWN" return res except (ParseException, ParseSyntaxException), msg: raise common.STCSParseError("Invalid STCS (%s)"%str(msg))
def _getSQLScriptGrammar(): """returns a pyparsing ParserElement that splits SQL scripts into individual commands. The rules are: Statements are separated by semicolons, empty statements are allowed. """ with utils.pyparsingWhitechars(" \t"): atom = Forward() atom.setName("Atom") sqlComment = Literal("--") + SkipTo("\n", include=True) cStyleComment = Literal("/*") + SkipTo("*/", include=True) comment = sqlComment | cStyleComment lineEnd = Literal("\n") simpleStr = QuotedString(quoteChar="'", escChar="\\", multiline=True, unquoteResults=False) quotedId = QuotedString(quoteChar='"', escChar="\\", unquoteResults=False) dollarQuoted = Regex(r"(?s)\$(\w*)\$.*?\$\1\$") dollarQuoted.setName("dollarQuoted") # well, quotedId is not exactly a string literal. I hate it, and so # it's lumped in here. strLiteral = simpleStr | dollarQuoted | quotedId strLiteral.setName("strLiteral") other = Regex("[^;'\"$]+") other.setName("other") literalDollar = Literal("$") + ~Literal("$") statementEnd = (Literal(';') + ZeroOrMore(lineEnd) | StringEnd()) atom << (Suppress(comment) | other | strLiteral | literalDollar) statement = OneOrMore(atom) + Suppress(statementEnd) statement.setName("statement") statement.setParseAction(lambda s, p, toks: " ".join(toks)) script = OneOrMore(statement) + StringEnd() script.setName("script") script.setParseAction( lambda s, p, toks: [t for t in toks.asList() if str(t).strip()]) if False: atom.setDebug(True) comment.setDebug(True) other.setDebug(True) strLiteral.setDebug(True) statement.setDebug(True) statementEnd.setDebug(True) dollarQuoted.setDebug(True) literalDollar.setDebug(True) return script
def impl(cls): from gavo.imp.pyparsing import (Word, Literal, alphas, alphanums, QuotedString, Forward, ZeroOrMore, Group, Optional) with utils.pyparsingWhitechars("\t\n\r "): qualifiedIdentifier = Word(alphas + "_:", alphanums + "-._:") plainIdentifier = Word(alphas + "_", alphanums + "-._") externalIdentifier = Word(alphas + "_", alphanums + "._/#-") plainLiteral = Word(alphanums + "_-.") quotedLiteral = QuotedString(quoteChar='"', escQuote='""') reference = Literal('@') + externalIdentifier complexImmediate = Forward() simpleImmediate = plainLiteral | quotedLiteral value = reference | complexImmediate | simpleImmediate attributeDef = (plainIdentifier + Literal(":") + value) typeAnnotation = (Literal('(') + qualifiedIdentifier + Literal(')')) objectBody = (Literal('{') + Group(ZeroOrMore(attributeDef)) + Literal('}')) obj = typeAnnotation + objectBody sequenceBody = (Literal('[') + Group(ZeroOrMore(value | objectBody)) + Literal(']')) collection = Optional(typeAnnotation) + sequenceBody complexImmediate << (obj | collection) for n, func in globals().iteritems(): if n.startswith("_pa_"): locals()[n[4:]].setParseAction(func) cls.symbols = locals() return obj
def _getSTCSGrammar(numberLiteral, timeLiteral, _exportAll=False, _addGeoReferences=False): """returns a dictionary of symbols for a grammar parsing STC-S into a concrete syntax tree. numberLiteral and timeLiteral are pyparsing symbols for numbers and datetimes, respectively. _addGeoReferences lets you write quoted references to vectors (like Circle "center" 20.). """ with utils.pyparsingWhitechars("\n\t\r "): number = numberLiteral del numberLiteral # units _unitOpener = Suppress(CaselessKeyword("unit")) _spaceUnitWord = Regex(_reFromKeys(spatialUnits)) _timeUnitWord = Regex(_reFromKeys(temporalUnits)) spaceUnit = _unitOpener - OneOrMore(_spaceUnitWord).addParseAction( _stringifyBlank)("unit") timeUnit = _unitOpener - _timeUnitWord("unit") spectralUnit = _unitOpener - Regex(_reFromKeys(spectralUnits))("unit") redshiftUnit = _unitOpener - ( (_spaceUnitWord + "/" + _timeUnitWord).addParseAction(_stringify) | CaselessKeyword("nil"))("unit") velocityUnit = _unitOpener - (OneOrMore( (_spaceUnitWord + "/" + _timeUnitWord).addParseAction( _stringify)).addParseAction(_stringifyBlank))("unit") # basic productions common to most STC-S subphrases astroYear = Regex("[BJ][0-9]+([.][0-9]*)?") fillfactor = (Suppress(CaselessKeyword("fillfactor")) + number("fillfactor")) noEqFrame = (CaselessKeyword("J2000") | CaselessKeyword("B1950") | CaselessKeyword("ICRS") | CaselessKeyword("GALACTIC") | CaselessKeyword("GALACTIC_I") | CaselessKeyword("GALACTIC_II") | CaselessKeyword("SUPER_GALACTIC") | CaselessKeyword("GEO_C") | CaselessKeyword("GEO_D") | CaselessKeyword("HPR") | CaselessKeyword("HGS") | CaselessKeyword("HGC") | CaselessKeyword("HPC") | CaselessKeyword("UNKNOWNFrame"))("frame") eqFrameName = (CaselessKeyword("FK5") | CaselessKeyword("FK4") | CaselessKeyword("ECLIPTIC"))("frame") eqFrame = eqFrameName + Optional(astroYear("equinox")) frame = eqFrame | noEqFrame plEphemeris = CaselessKeyword("JPL-DE200") | CaselessKeyword( "JPL-DE405") refpos = ((Regex(_reFromKeys(common.stcRefPositions)))("refpos") + Optional(plEphemeris("plEphemeris"))) flavor = (Regex(_reFromKeys(stcsFlavors)))("flavor") # properties of coordinates error = Suppress(CaselessKeyword("Error")) + OneOrMore(number) resolution = Suppress( CaselessKeyword("Resolution")) + OneOrMore(number) size = Suppress(CaselessKeyword("Size")) + OneOrMore(number) pixSize = Suppress(CaselessKeyword("PixSize")) + OneOrMore(number) cooProps = (Optional(error("error")) + Optional(resolution("resolution")) + Optional(size("size")) + Optional(pixSize("pixSize"))) # properties of most spatial specs _coos = ZeroOrMore(number)("coos") _pos = Optional(ZeroOrMore(number)("pos")) if _addGeoReferences: # include references to vectors, for getColrefSymbols complexColRef = Regex( '[[][A-Za-z_][A-Za-z_0-9]*[]]').addParseAction( lambda s, p, toks: common.GeometryColRef(toks[0][1:-1])) _coos = complexColRef("coos") | _coos _pos = complexColRef("pos") | _pos positionSpec = Suppress(CaselessKeyword("Position")) + _pos epochSpec = Suppress(CaselessKeyword("Epoch")) - astroYear _spatialProps = Optional(spaceUnit) + cooProps velocitySpec = (CaselessKeyword("Velocity")("type") + OneOrMore(number)("pos")) velocityInterval = (Optional( CaselessKeyword("VelocityInterval")("type") + Optional(fillfactor) + _coos) + Optional(velocitySpec) + Optional(velocityUnit) + cooProps).addParseAction(makeTree) _spatialTail = (_spatialProps + Optional(velocityInterval)("velocity")) _regionTail = Optional(positionSpec) + _spatialTail _commonSpaceItems = ( frame + Optional(refpos) + Optional(flavor) + Optional(epochSpec("epoch").addParseAction(_stringify))) _commonRegionItems = Optional(fillfactor) + _commonSpaceItems # times and time intervals timescale = (Regex("|".join(common.stcTimeScales)))("timescale") timephrase = Suppress(CaselessKeyword("Time")) + timeLiteral _commonTimeItems = Optional(timeUnit) + cooProps _intervalOpener = (Optional(fillfactor) + Optional(timescale("timescale")) + Optional(refpos)) _intervalCloser = Optional(timephrase("pos")) + _commonTimeItems timeInterval = (CaselessKeyword("TimeInterval")("type") + _intervalOpener + ZeroOrMore(timeLiteral)("coos") + _intervalCloser) startTime = (CaselessKeyword("StartTime")("type") + _intervalOpener + timeLiteral.setResultsName("coos", True) + _intervalCloser) stopTime = (CaselessKeyword("StopTime")("type") + _intervalOpener + timeLiteral.setResultsName("coos", True) + _intervalCloser) time = (CaselessKeyword("Time")("type") + Optional(timescale("timescale")) + Optional(refpos) + Optional(timeLiteral.setResultsName("pos", True)) + _commonTimeItems) timeSubPhrase = (timeInterval | startTime | stopTime | time).addParseAction(makeTree) # atomic "geometries"; I do not bother to specify their actual # arguments since, without knowing the frame, they may be basically # anthing. Also, I want to allow geometry column references. _atomicGeometryKey = (CaselessKeyword("AllSky").setName("sub-geometry") | CaselessKeyword("Circle") | CaselessKeyword("Ellipse") | CaselessKeyword("Box") | CaselessKeyword("Polygon") | CaselessKeyword("Convex") | CaselessKeyword("PositionInterval")) atomicGeometry = (_atomicGeometryKey("type") + _commonRegionItems + _coos + _regionTail) # compound "geometries" _compoundGeoExpression = Forward() _compoundGeoOperand = ( (_atomicGeometryKey("subtype") + _coos) | _compoundGeoExpression).addParseAction(lambda s, p, t: dict(t)) _compoundGeoOperatorUnary = CaselessKeyword("Not") _compoundGeoOperandsUnary = (Suppress('(') + _compoundGeoOperand + Suppress(')')) _compoundGeoExprUnary = (_compoundGeoOperatorUnary("subtype") + _compoundGeoOperandsUnary("children")) _compoundGeoOperatorBinary = CaselessKeyword("Difference") _compoundGeoOperandsBinary = (Suppress('(') + _compoundGeoOperand + _compoundGeoOperand + Suppress(')')) _compoundGeoExprBinary = (_compoundGeoOperatorBinary("subtype") + _compoundGeoOperandsBinary("children")) _compoundGeoOperatorNary = (CaselessKeyword("Union") | CaselessKeyword("Intersection")) _compoundGeoOperandsNary = (Suppress('(') + _compoundGeoOperand + _compoundGeoOperand + ZeroOrMore(_compoundGeoOperand) + Suppress(')')) _compoundGeoExprNary = (_compoundGeoOperatorNary("subtype") + _compoundGeoOperandsNary("children")) _compoundGeoExpression << (_compoundGeoExprUnary | _compoundGeoExprBinary | _compoundGeoExprNary) compoundGeoPhrase = ( _compoundGeoOperatorUnary("type") + _commonRegionItems + _compoundGeoOperandsUnary("children") + _regionTail | _compoundGeoOperatorBinary("type") + _commonRegionItems + _compoundGeoOperandsBinary("children") + _regionTail | _compoundGeoOperatorNary("type") + _commonRegionItems - _compoundGeoOperandsNary("children") + _regionTail) # space subphrase positionInterval = (CaselessKeyword("PositionInterval")("type") + _commonRegionItems + _coos + _regionTail) position = (CaselessKeyword("Position")("type") + _commonSpaceItems + _pos + _spatialTail) spaceSubPhrase = (positionInterval | position | atomicGeometry | compoundGeoPhrase).addParseAction(makeTree) # spectral subphrase spectralSpec = (Suppress(CaselessKeyword("Spectral")) + number)("pos") _spectralTail = Optional(spectralUnit) + cooProps spectralInterval = (CaselessKeyword("SpectralInterval")("type") + Optional(fillfactor) + Optional(refpos) + _coos + Optional(spectralSpec) + _spectralTail) spectral = (CaselessKeyword("Spectral")("type") + Optional(refpos) + _pos + _spectralTail) spectralSubPhrase = (spectralInterval | spectral).addParseAction(makeTree) # redshift subphrase redshiftType = Regex("VELOCITY|REDSHIFT")("redshiftType") redshiftSpec = (Suppress(CaselessKeyword("Redshift")) + number)("pos") dopplerdef = Regex("OPTICAL|RADIO|RELATIVISTIC")("dopplerdef") _redshiftTail = Optional(redshiftUnit) + cooProps redshiftInterval = (CaselessKeyword("RedshiftInterval")("type") + Optional(fillfactor) + Optional(refpos) + Optional(redshiftType) + Optional(dopplerdef) + _coos + Optional(redshiftSpec) + _redshiftTail) redshift = (CaselessKeyword("Redshift")("type") + Optional(refpos) + Optional(redshiftType) + Optional(dopplerdef) + _pos + _redshiftTail) redshiftSubPhrase = (redshiftInterval | redshift).addParseAction(makeTree) # system subphrase (extension, see docs) # ids match Name from XML spec; we're not doing char refs and similar here xmlName = Word(alphas + "_:", alphanums + '.-_:').addParseAction(_stringify) systemDefinition = (Suppress(CaselessKeyword("System")) + xmlName("libSystem")) # top level stcsPhrase = ( #noflake: stcsPhrase is returned through locals() Optional(timeSubPhrase)("time") + Optional(spaceSubPhrase) ("space") + Optional(spectralSubPhrase)("spectral") + Optional(redshiftSubPhrase)("redshift") + Optional(systemDefinition)) + StringEnd() return _makeSymDict(locals(), _exportAll)
def impl(cls): from gavo.imp.pyparsing import (Word, Literal, Regex, Optional, ZeroOrMore, alphas, Suppress, Forward, White) with utils.pyparsingWhitechars(''): unit_atom = Word(alphas).addParseAction(UnitNode.fromToks) unit_atom.setName("atomic unit") quoted_unit_atom = ("'" + Word(alphas) + "'").addParseAction( QuotedUnitNode.fromToks) quoted_unit_atom.setName("quoted atomic unit") OPEN_P = Literal('(') CLOSE_P = Literal(')') SIGN = Literal('+') | Literal('-') FUNCTION_NAME = Word(alphas) UNSIGNED_INTEGER = Word("01234567890") SIGNED_INTEGER = SIGN + UNSIGNED_INTEGER FLOAT = Regex(r"[+-]?([0-9]+(\.[0-9]*)?)") VOFLOAT = Regex(r"0.[0-9]+([eE][+-]?[0-9]+)?" "|[1-9][0-9]*(\.[0-9]+)?([eE][+-]?[0-9]+)?") integer = SIGNED_INTEGER | UNSIGNED_INTEGER power_operator = Literal('**') multiplication_operator = Literal(".") division_operator = Literal("/") numeric_power = ( integer | OPEN_P + integer + CLOSE_P | OPEN_P + FLOAT + CLOSE_P | OPEN_P + integer + '/' + UNSIGNED_INTEGER.addParseAction(lambda s, p, t: t[0] + ".") + CLOSE_P) numeric_power.setParseAction(evalAll) pow_10 = Literal("10") + power_operator + numeric_power scale_factor = (pow_10 | VOFLOAT).setParseAction(evalAll) any_unit_atom = unit_atom | quoted_unit_atom factor = (any_unit_atom + Optional(Suppress(power_operator) + numeric_power)).addParseAction(Factor.fromToks) complete_expression = Forward() function_application = (FUNCTION_NAME + Suppress(OPEN_P) + complete_expression + Suppress(CLOSE_P)) function_application.addParseAction(FunctionApplication.fromToks) unit_expression = ( Suppress(OPEN_P) + complete_expression + Suppress(CLOSE_P) | (factor ^ function_application)) product_of_units = ( unit_expression + ZeroOrMore(multiplication_operator + unit_expression)).setParseAction(_buildTerm) complete_expression << ( product_of_units + Optional(division_operator + unit_expression)) complete_expression.setParseAction(Term.fromToks) input = (Optional(scale_factor) + Optional(Suppress(White())) + complete_expression).setParseAction(Expression.fromToks) cls.symbols = locals() return input