def parseQuery(self, query): # Lexical analysis. tokens = _tokenizer_regex.findall(query) self._tokens = tokens # classify tokens self._tokentypes = [_keywords.get(token.upper(), _ATOM) for token in tokens] # add _EOF self._tokens.append(_EOF) self._tokentypes.append(_EOF) self._index = 0 # Syntactical analysis. self._ignored = [] # Ignored words in the query, for parseQueryEx tree = self._parseOrExpr() self._require(_EOF) if tree is None: raise parsetree.ParseError( "Query contains only common words: %s" % repr(query)) return tree
def _parseTerm(self): if self._check(_LPAREN): tree = self._parseOrExpr() self._require(_RPAREN) else: nodes = [] nodes = [self._parseAtom()] while self._peek(_ATOM): nodes.append(self._parseAtom()) nodes = filter(None, nodes) if not nodes: return None # Only stopwords structure = [(isinstance(nodes[i], parsetree.NotNode), i, nodes[i]) for i in range(len(nodes))] structure.sort() nodes = [node for (_, index, node) in structure] if isinstance(nodes[0], parsetree.NotNode): raise parsetree.ParseError( "a term must have at least one positive word") if len(nodes) == 1: return nodes[0] tree = parsetree.AndNode(nodes) return tree
def _require(self, tokentype): if not self._check(tokentype): t = self._tokens[self._index] msg = "Token %r required, %r found" % (tokentype, t) raise parsetree.ParseError(msg)