示例#1
0
    def parseQuery(self, query):
        # Lexical analysis.
        tokens = _tokenizer_regex.findall(query)
        self._tokens = tokens
        # classify tokens
        self._tokentypes = [_keywords.get(token.upper(), _ATOM)
                            for token in tokens]
        # add _EOF
        self._tokens.append(_EOF)
        self._tokentypes.append(_EOF)
        self._index = 0

        # Syntactical analysis.
        self._ignored = [] # Ignored words in the query, for parseQueryEx
        tree = self._parseOrExpr()
        self._require(_EOF)
        if tree is None:
            raise parsetree.ParseError(
                "Query contains only common words: %s" % repr(query))
        return tree
示例#2
0
 def _parseTerm(self):
     if self._check(_LPAREN):
         tree = self._parseOrExpr()
         self._require(_RPAREN)
     else:
         nodes = []
         nodes = [self._parseAtom()]
         while self._peek(_ATOM):
             nodes.append(self._parseAtom())
         nodes = filter(None, nodes)
         if not nodes:
             return None  # Only stopwords
         structure = [(isinstance(nodes[i], parsetree.NotNode), i, nodes[i])
                      for i in range(len(nodes))]
         structure.sort()
         nodes = [node for (_, index, node) in structure]
         if isinstance(nodes[0], parsetree.NotNode):
             raise parsetree.ParseError(
                 "a term must have at least one positive word")
         if len(nodes) == 1:
             return nodes[0]
         tree = parsetree.AndNode(nodes)
     return tree
示例#3
0
 def _require(self, tokentype):
     if not self._check(tokentype):
         t = self._tokens[self._index]
         msg = "Token %r required, %r found" % (tokentype, t)
         raise parsetree.ParseError(msg)