def _make_simple_parser(): ParserElement.setDefaultWhitespaceChars(" \n\t\r'") wordchars = printables for specialchar in ':+-"': wordchars = wordchars.replace(specialchar, "") wordtoken = Combine(Word(wordchars) + ZeroOrMore("." + Word(wordchars))) # A word-like thing generalWord = Group(wordtoken).setResultsName("Word") # A quoted phrase quotedPhrase = Group(Suppress('"') + CharsNotIn('"') + Suppress('"')).setResultsName("Quotes") # Units of content fieldableUnit = quotedPhrase | generalWord fieldedUnit = Group(Word(alphanums) + Suppress(':') + fieldableUnit).setResultsName("Field") unit = fieldedUnit | fieldableUnit # A unit may be "not"-ed. operatorNot = Group(Suppress(Literal("-")) + unit).setResultsName("Not") # A unit may be required operatorReqd = Group(Suppress(Literal("+")) + unit).setResultsName("Required") generalUnit = operatorNot | operatorReqd | unit expression = (OneOrMore(generalUnit) | Empty()) toplevel = Group(expression).setResultsName("Toplevel") + StringEnd() return toplevel.parseString
def _make_default_parser(): ParserElement.setDefaultWhitespaceChars(" \n\t\r'") #wordToken = Word(self.wordChars) wordToken = Regex(r"(\w|/)+(\.?(\w|\-|/)+)*", re.UNICODE) # A plain old word. plainWord = Group(wordToken).setResultsName("Word") # A word ending in a star (e.g. 'render*'), indicating that # the search should do prefix expansion. prefixWord = Group(Combine(wordToken + Suppress('*'))).setResultsName("Prefix") # A wildcard word containing * or ?. wildcard = Group(Regex(r"\w*(?:[\?\*]\w*)+")).setResultsName("Wildcard") # A range of terms range = Group(plainWord + Suppress("..") + plainWord).setResultsName("Range") # A word-like thing generalWord = range | prefixWord | wildcard | plainWord # A quoted phrase quotedPhrase = Group(Suppress('"') + CharsNotIn('"') + Suppress('"')).setResultsName("Quotes") expression = Forward() # Parentheses can enclose (group) any expression parenthetical = Group((Suppress("(") + expression + Suppress(")"))).setResultsName("Group") boostableUnit = quotedPhrase | generalWord boostedUnit = Group(boostableUnit + Suppress("^") + Word("0123456789", ".0123456789")).setResultsName("Boost") # The user can flag that a parenthetical group, quoted phrase, or word # should be searched in a particular field by prepending 'fn:', where fn is # the name of the field. fieldableUnit = parenthetical | boostedUnit | boostableUnit fieldedUnit = Group(Word(alphanums) + Suppress(':') + fieldableUnit).setResultsName("Field") # Units of content unit = fieldedUnit | fieldableUnit # A unit may be "not"-ed. operatorNot = Group(Suppress(Keyword("not", caseless=True)) + unit).setResultsName("Not") generalUnit = operatorNot | unit andToken = Keyword("and", caseless=True) orToken = Keyword("or", caseless=True) operatorAnd = Group(generalUnit + Suppress(andToken) + expression).setResultsName("And") operatorOr = Group(generalUnit + Suppress(orToken) + expression).setResultsName("Or") expression << (OneOrMore(operatorAnd | operatorOr | generalUnit) | Empty()) toplevel = Group(expression).setResultsName("Toplevel") + StringEnd() return toplevel.parseString
def _make_simple_parser(): ParserElement.setDefaultWhitespaceChars(" \n\t\r'") wordToken = Regex(r"(\w|/)+(\.?(\w|\-|/)+)*", re.UNICODE) # A word-like thing generalWord = Group(wordToken).setResultsName("Word") # A quoted phrase quotedPhrase = Group(Suppress('"') + CharsNotIn('"') + Suppress('"')).setResultsName("Quotes") # Units of content fieldableUnit = quotedPhrase | generalWord fieldedUnit = Group(Word(alphanums) + Suppress(':') + fieldableUnit).setResultsName("Field") unit = fieldedUnit | fieldableUnit # A unit may be "not"-ed. operatorNot = Group(Suppress(Literal("-")) + unit).setResultsName("Not") # A unit may be required operatorReqd = Group(Suppress(Literal("+")) + unit).setResultsName("Required") generalUnit = operatorNot | operatorReqd | unit expression = (OneOrMore(generalUnit) | Empty()) toplevel = Group(expression).setResultsName("Toplevel") + StringEnd() return toplevel.parseString
def _make_default_parser(): escapechar = "\\" #wordchars = printables #for specialchar in '*?^():"{}[] ' + escapechar: # wordchars = wordchars.replace(specialchar, "") #wordtext = Word(wordchars) wordtext = CharsNotIn('\\*?^():"{}[] ') escape = Suppress(escapechar) + (Word(printables, exact=1) | White(exact=1)) wordtoken = Combine(OneOrMore(wordtext | escape)) # A plain old word. plainWord = Group(wordtoken).setResultsName("Word") # A wildcard word containing * or ?. wildchars = Word("?*") # Start with word chars and then have wild chars mixed in wildmixed = wordtoken + OneOrMore(wildchars + Optional(wordtoken)) # Or, start with wildchars, and then either a mixture of word and wild chars, or the next token wildstart = wildchars + (OneOrMore(wordtoken + Optional(wildchars)) | FollowedBy(White() | StringEnd())) wildcard = Group(Combine(wildmixed | wildstart)).setResultsName("Wildcard") # A range of terms startfence = Literal("[") | Literal("{") endfence = Literal("]") | Literal("}") rangeitem = QuotedString('"') | wordtoken openstartrange = Group(Empty()) + Suppress(Keyword("TO") + White()) + Group(rangeitem) openendrange = Group(rangeitem) + Suppress(White() + Keyword("TO")) + Group(Empty()) normalrange = Group(rangeitem) + Suppress(White() + Keyword("TO") + White()) + Group(rangeitem) range = Group(startfence + (normalrange | openstartrange | openendrange) + endfence).setResultsName("Range") # rangeitem = QuotedString('"') | wordtoken # rangestartitem = Group((rangeitem + Suppress(White())) | Empty()).setResultsName("rangestart") # rangeenditem = Group((Suppress(White()) + rangeitem) | Empty()).setResultsName("rangeend") # rangestart = (Literal("{") | Literal("[")) + rangestartitem # rangeend = rangeenditem + (Literal("}") | Literal("]")) # range = Group(rangestart + Suppress(Literal("TO")) + rangeend).setResultsName("Range") # A word-like thing generalWord = range | wildcard | plainWord # A quoted phrase quotedPhrase = Group(QuotedString('"')).setResultsName("Quotes") expression = Forward() # Parentheses can enclose (group) any expression parenthetical = Group((Suppress("(") + expression + Suppress(")"))).setResultsName("Group") boostableUnit = generalWord | quotedPhrase boostedUnit = Group(boostableUnit + Suppress("^") + Word("0123456789", ".0123456789")).setResultsName("Boost") # The user can flag that a parenthetical group, quoted phrase, or word # should be searched in a particular field by prepending 'fn:', where fn is # the name of the field. fieldableUnit = parenthetical | boostedUnit | boostableUnit fieldedUnit = Group(Word(alphanums + "_") + Suppress(':') + fieldableUnit).setResultsName("Field") # Units of content unit = fieldedUnit | fieldableUnit # A unit may be "not"-ed. operatorNot = Group(Suppress(Keyword("not", caseless=True)) + Suppress(White()) + unit).setResultsName("Not") generalUnit = operatorNot | unit andToken = Keyword("AND", caseless=False) orToken = Keyword("OR", caseless=False) andNotToken = Keyword("ANDNOT", caseless=False) operatorAnd = Group(generalUnit + Suppress(White()) + Suppress(andToken) + Suppress(White()) + expression).setResultsName("And") operatorOr = Group(generalUnit + Suppress(White()) + Suppress(orToken) + Suppress(White()) + expression).setResultsName("Or") operatorAndNot = Group(unit + Suppress(White()) + Suppress(andNotToken) + Suppress(White()) + unit).setResultsName("AndNot") expression << (OneOrMore(operatorAnd | operatorOr | operatorAndNot | generalUnit | Suppress(White())) | Empty()) toplevel = Group(expression).setResultsName("Toplevel") + StringEnd() return toplevel.parseString
def _make_default_parser(): escapechar = "\\" wordchars = printables for specialchar in '*?^():."' + escapechar: wordchars = wordchars.replace(specialchar, "") wordtext = Combine(Word(wordchars) + ZeroOrMore("." + Word(wordchars))) escape = Suppress(escapechar) + (Word(printables, exact=1) | White(exact=1)) wordtoken = Combine(OneOrMore(wordtext | escape)) # A plain old word. plainWord = Group(wordtoken).setResultsName("Word") # A word ending in a star (e.g. 'render*'), indicating that # the search should do prefix expansion. prefixWord = Group(Combine(wordtoken + Suppress('*'))).setResultsName("Prefix") # A wildcard word containing * or ?. wildcard = Group(Regex(r"\w*(?:[\?\*]\w*)+")).setResultsName("Wildcard") # A range of terms range = Group(plainWord + Suppress(Optional(White()) + ".." + Optional(White())) + plainWord).setResultsName("Range") # A word-like thing generalWord = range | prefixWord | wildcard | plainWord # A quoted phrase quotedPhrase = Group(Suppress('"') + CharsNotIn('"') + Suppress('"')).setResultsName("Quotes") expression = Forward() # Parentheses can enclose (group) any expression parenthetical = Group( (Suppress("(") + expression + Suppress(")"))).setResultsName("Group") boostableUnit = quotedPhrase | generalWord boostedUnit = Group(boostableUnit + Suppress("^") + Word("0123456789", ".0123456789")).setResultsName( "Boost") # The user can flag that a parenthetical group, quoted phrase, or word # should be searched in a particular field by prepending 'fn:', where fn is # the name of the field. fieldableUnit = parenthetical | boostedUnit | boostableUnit fieldedUnit = Group(Word(alphanums + "_") + Suppress(':') + fieldableUnit).setResultsName("Field") # Units of content unit = fieldedUnit | fieldableUnit # A unit may be "not"-ed. operatorNot = Group( Suppress(Keyword("not", caseless=True)) + Suppress(White()) + unit).setResultsName("Not") generalUnit = operatorNot | unit andToken = Keyword("and", caseless=True) orToken = Keyword("or", caseless=True) operatorAnd = Group(generalUnit + Suppress(White()) + Suppress(andToken) + Suppress(White()) + expression).setResultsName("And") operatorOr = Group(generalUnit + Suppress(White()) + Suppress(orToken) + Suppress(White()) + expression).setResultsName("Or") expression << (OneOrMore(operatorAnd | operatorOr | generalUnit | Suppress(White())) | Empty()) toplevel = Group(expression).setResultsName("Toplevel") + StringEnd() return toplevel.parseString