Exemplo n.º 1
0
def _make_simple_parser():
    ParserElement.setDefaultWhitespaceChars(" \n\t\r'")
    
    wordchars = printables
    for specialchar in ':+-"':
        wordchars = wordchars.replace(specialchar, "")
    
    wordtoken = Combine(Word(wordchars) + ZeroOrMore("." + Word(wordchars)))
    
    # A word-like thing
    generalWord = Group(wordtoken).setResultsName("Word")
    
    # A quoted phrase
    quotedPhrase = Group(Suppress('"') + CharsNotIn('"') + Suppress('"')).setResultsName("Quotes")
    
    # Units of content
    fieldableUnit = quotedPhrase | generalWord
    fieldedUnit = Group(Word(alphanums) + Suppress(':') + fieldableUnit).setResultsName("Field")
    unit = fieldedUnit | fieldableUnit

    # A unit may be "not"-ed.
    operatorNot = Group(Suppress(Literal("-")) + unit).setResultsName("Not")
    
    # A unit may be required
    operatorReqd = Group(Suppress(Literal("+")) + unit).setResultsName("Required")
    
    generalUnit = operatorNot | operatorReqd | unit

    expression = (OneOrMore(generalUnit) | Empty())
    toplevel = Group(expression).setResultsName("Toplevel") + StringEnd()
    
    return toplevel.parseString
Exemplo n.º 2
0
def _make_simple_parser():
    ParserElement.setDefaultWhitespaceChars(" \n\t\r'")

    wordToken = Regex(r"(\w|/)+(\.?(\w|\-|/)+)*", re.UNICODE)

    # A word-like thing
    generalWord = Group(wordToken).setResultsName("Word")

    # A quoted phrase
    quotedPhrase = Group(Suppress('"') + CharsNotIn('"') +
                         Suppress('"')).setResultsName("Quotes")

    # Units of content
    fieldableUnit = quotedPhrase | generalWord
    fieldedUnit = Group(Word(alphanums) + Suppress(':') +
                        fieldableUnit).setResultsName("Field")
    unit = fieldedUnit | fieldableUnit

    # A unit may be "not"-ed.
    operatorNot = Group(Suppress(Literal("-")) + unit).setResultsName("Not")

    # A unit may be required
    operatorReqd = Group(Suppress(Literal("+")) +
                         unit).setResultsName("Required")

    generalUnit = operatorNot | operatorReqd | unit

    expression = (OneOrMore(generalUnit) | Empty())
    toplevel = Group(expression).setResultsName("Toplevel") + StringEnd()

    return toplevel.parseString
Exemplo n.º 3
0
def _make_default_parser():
    ParserElement.setDefaultWhitespaceChars(" \n\t\r'")
    
    #wordToken = Word(self.wordChars)
    wordToken = Regex(r"(\w|/)+(\.?(\w|\-|/)+)*", re.UNICODE)
    
    # A plain old word.
    plainWord = Group(wordToken).setResultsName("Word")
    
    # A word ending in a star (e.g. 'render*'), indicating that
    # the search should do prefix expansion.
    prefixWord = Group(Combine(wordToken + Suppress('*'))).setResultsName("Prefix")
    
    # A wildcard word containing * or ?.
    wildcard = Group(Regex(r"\w*(?:[\?\*]\w*)+")).setResultsName("Wildcard")
    
    # A range of terms
    range = Group(plainWord + Suppress("..") + plainWord).setResultsName("Range")
    
    # A word-like thing
    generalWord = range | prefixWord | wildcard | plainWord
    
    # A quoted phrase
    quotedPhrase = Group(Suppress('"') + CharsNotIn('"') + Suppress('"')).setResultsName("Quotes")
    
    expression = Forward()
    
    # Parentheses can enclose (group) any expression
    parenthetical = Group((Suppress("(") + expression + Suppress(")"))).setResultsName("Group")

    boostableUnit = quotedPhrase | generalWord
    boostedUnit = Group(boostableUnit + Suppress("^") + Word("0123456789", ".0123456789")).setResultsName("Boost")

    # The user can flag that a parenthetical group, quoted phrase, or word
    # should be searched in a particular field by prepending 'fn:', where fn is
    # the name of the field.
    fieldableUnit = parenthetical | boostedUnit | boostableUnit
    fieldedUnit = Group(Word(alphanums) + Suppress(':') + fieldableUnit).setResultsName("Field")
    
    # Units of content
    unit = fieldedUnit | fieldableUnit

    # A unit may be "not"-ed.
    operatorNot = Group(Suppress(Keyword("not", caseless=True)) + unit).setResultsName("Not")
    generalUnit = operatorNot | unit

    andToken = Keyword("and", caseless=True)
    orToken = Keyword("or", caseless=True)
    
    operatorAnd = Group(generalUnit + Suppress(andToken) + expression).setResultsName("And")
    operatorOr = Group(generalUnit + Suppress(orToken) + expression).setResultsName("Or")

    expression << (OneOrMore(operatorAnd | operatorOr | generalUnit) | Empty())
    
    toplevel = Group(expression).setResultsName("Toplevel") + StringEnd()
    
    return toplevel.parseString
Exemplo n.º 4
0
def _make_default_parser():
    escapechar = "\\"
    
    #wordchars = printables
    #for specialchar in '*?^():"{}[] ' + escapechar:
    #    wordchars = wordchars.replace(specialchar, "")
    #wordtext = Word(wordchars)
    
    wordtext = CharsNotIn('\\*?^():"{}[] ')
    escape = Suppress(escapechar) + (Word(printables, exact=1) | White(exact=1))
    wordtoken = Combine(OneOrMore(wordtext | escape))
    
    # A plain old word.
    plainWord = Group(wordtoken).setResultsName("Word")
    
    # A wildcard word containing * or ?.
    wildchars = Word("?*")
    # Start with word chars and then have wild chars mixed in
    wildmixed = wordtoken + OneOrMore(wildchars + Optional(wordtoken))
    # Or, start with wildchars, and then either a mixture of word and wild chars, or the next token
    wildstart = wildchars + (OneOrMore(wordtoken + Optional(wildchars)) | FollowedBy(White() | StringEnd()))
    wildcard = Group(Combine(wildmixed | wildstart)).setResultsName("Wildcard")
    
    # A range of terms
    startfence = Literal("[") | Literal("{")
    endfence = Literal("]") | Literal("}")
    rangeitem = QuotedString('"') | wordtoken
    openstartrange = Group(Empty()) + Suppress(Keyword("TO") + White()) + Group(rangeitem)
    openendrange = Group(rangeitem) + Suppress(White() + Keyword("TO")) + Group(Empty())
    normalrange = Group(rangeitem) + Suppress(White() + Keyword("TO") + White()) + Group(rangeitem)
    range = Group(startfence + (normalrange | openstartrange | openendrange) + endfence).setResultsName("Range")
    
#    rangeitem = QuotedString('"') | wordtoken
#    rangestartitem = Group((rangeitem + Suppress(White())) | Empty()).setResultsName("rangestart")
#    rangeenditem = Group((Suppress(White()) + rangeitem) | Empty()).setResultsName("rangeend")
#    rangestart = (Literal("{") | Literal("[")) + rangestartitem
#    rangeend = rangeenditem + (Literal("}") | Literal("]"))
#    range =  Group(rangestart + Suppress(Literal("TO")) + rangeend).setResultsName("Range")
    
    # A word-like thing
    generalWord = range | wildcard | plainWord
    
    # A quoted phrase
    quotedPhrase = Group(QuotedString('"')).setResultsName("Quotes")
    
    expression = Forward()
    
    # Parentheses can enclose (group) any expression
    parenthetical = Group((Suppress("(") + expression + Suppress(")"))).setResultsName("Group")

    boostableUnit = generalWord | quotedPhrase
    boostedUnit = Group(boostableUnit + Suppress("^") + Word("0123456789", ".0123456789")).setResultsName("Boost")

    # The user can flag that a parenthetical group, quoted phrase, or word
    # should be searched in a particular field by prepending 'fn:', where fn is
    # the name of the field.
    fieldableUnit = parenthetical | boostedUnit | boostableUnit
    fieldedUnit = Group(Word(alphanums + "_") + Suppress(':') + fieldableUnit).setResultsName("Field")
    
    # Units of content
    unit = fieldedUnit | fieldableUnit

    # A unit may be "not"-ed.
    operatorNot = Group(Suppress(Keyword("not", caseless=True)) +  Suppress(White()) + unit).setResultsName("Not")
    generalUnit = operatorNot | unit

    andToken = Keyword("AND", caseless=False)
    orToken = Keyword("OR", caseless=False)
    andNotToken = Keyword("ANDNOT", caseless=False)
    
    operatorAnd = Group(generalUnit +  Suppress(White()) + Suppress(andToken) +  Suppress(White()) + expression).setResultsName("And")
    operatorOr = Group(generalUnit +  Suppress(White()) + Suppress(orToken) +  Suppress(White()) + expression).setResultsName("Or")
    operatorAndNot = Group(unit + Suppress(White()) + Suppress(andNotToken) + Suppress(White()) + unit).setResultsName("AndNot")

    expression << (OneOrMore(operatorAnd | operatorOr | operatorAndNot | generalUnit | Suppress(White())) | Empty())
    
    toplevel = Group(expression).setResultsName("Toplevel") + StringEnd()
    
    return toplevel.parseString
Exemplo n.º 5
0
def _make_default_parser():
    escapechar = "\\"

    wordchars = printables
    for specialchar in '*?^():."' + escapechar:
        wordchars = wordchars.replace(specialchar, "")

    wordtext = Combine(Word(wordchars) + ZeroOrMore("." + Word(wordchars)))
    escape = Suppress(escapechar) + (Word(printables, exact=1)
                                     | White(exact=1))
    wordtoken = Combine(OneOrMore(wordtext | escape))

    # A plain old word.
    plainWord = Group(wordtoken).setResultsName("Word")

    # A word ending in a star (e.g. 'render*'), indicating that
    # the search should do prefix expansion.
    prefixWord = Group(Combine(wordtoken +
                               Suppress('*'))).setResultsName("Prefix")

    # A wildcard word containing * or ?.
    wildcard = Group(Regex(r"\w*(?:[\?\*]\w*)+")).setResultsName("Wildcard")

    # A range of terms
    range = Group(plainWord +
                  Suppress(Optional(White()) + ".." + Optional(White())) +
                  plainWord).setResultsName("Range")

    # A word-like thing
    generalWord = range | prefixWord | wildcard | plainWord

    # A quoted phrase
    quotedPhrase = Group(Suppress('"') + CharsNotIn('"') +
                         Suppress('"')).setResultsName("Quotes")

    expression = Forward()

    # Parentheses can enclose (group) any expression
    parenthetical = Group(
        (Suppress("(") + expression + Suppress(")"))).setResultsName("Group")

    boostableUnit = quotedPhrase | generalWord
    boostedUnit = Group(boostableUnit + Suppress("^") +
                        Word("0123456789", ".0123456789")).setResultsName(
                            "Boost")

    # The user can flag that a parenthetical group, quoted phrase, or word
    # should be searched in a particular field by prepending 'fn:', where fn is
    # the name of the field.
    fieldableUnit = parenthetical | boostedUnit | boostableUnit
    fieldedUnit = Group(Word(alphanums + "_") + Suppress(':') +
                        fieldableUnit).setResultsName("Field")

    # Units of content
    unit = fieldedUnit | fieldableUnit

    # A unit may be "not"-ed.
    operatorNot = Group(
        Suppress(Keyword("not", caseless=True)) + Suppress(White()) +
        unit).setResultsName("Not")
    generalUnit = operatorNot | unit

    andToken = Keyword("and", caseless=True)
    orToken = Keyword("or", caseless=True)

    operatorAnd = Group(generalUnit + Suppress(White()) + Suppress(andToken) +
                        Suppress(White()) + expression).setResultsName("And")
    operatorOr = Group(generalUnit + Suppress(White()) + Suppress(orToken) +
                       Suppress(White()) + expression).setResultsName("Or")

    expression << (OneOrMore(operatorAnd | operatorOr | generalUnit
                             | Suppress(White())) | Empty())

    toplevel = Group(expression).setResultsName("Toplevel") + StringEnd()

    return toplevel.parseString