def _get_xquery(catalog, query=None, **kw): # Case 1: a query is given if query is not None: return catalog._query2xquery(query) # Case 2: nothing has been specified, return everything if not kw: return Query('') # Case 3: build the query from the keyword parameters metadata = catalog._metadata fields = catalog._fields xqueries = [] for name, value in kw.iteritems(): # If name is a field not yet indexed, return nothing if name not in metadata: warn_not_indexed(name) return Query() # Ok info = metadata[name] prefix = info['prefix'] field_cls = _get_field_cls(name, fields, info) query = _make_PhraseQuery(field_cls, value, prefix) xqueries.append(query) return Query(OP_AND, xqueries)
def to_query(self, query, default_op=Query.OP_AND): if isinstance(query, list): return Query(default_op, [self.to_query(q) for q in query]) elif isinstance(query, basestring): return Query(query) elif isinstance(query, Query): return query
def _parse_query_term(self, name, prefix, value): if isinstance(value, list): subqueries = [self._parse_query_term(name, prefix, word) for word in value] return Query(Query.OP_OR, subqueries) elif prefix: return Query(_PREFIX_FULL_VALUE + prefix + str(value)) else: return Query(_PREFIX_NONE + str(value))
def __init__(self, *args, **kwargs): """ @keyword encoding: specifiy the encoding manually (default: value of config.charset) """ self.encoding = kwargs.get('encoding', config.charset) nargs = [] for term in args: if isinstance(term, unicode): term = term.encode(self.encoding) elif isinstance(term, list) or isinstance(term, tuple): term = [t.encode(self.encoding) for t in term] nargs.append(term) Query.__init__(self, *nargs, **kwargs)
def _parse_query_value_range(self, name, info, value): if len(value) != 2: raise TypeError('Only tuples of size 2 have a defined meaning. ' 'Did you mean to pass a list instead?') start, end = value return Query(Query.OP_VALUE_RANGE, info['number'], self._convert_value(info, start), self._convert_value(info, end))
def _parse_query_xapian(self, query_str): try: return xapian.QueryParser.parse_query( self, query_str, QueryParser.FLAG_PHRASE | QueryParser.FLAG_BOOLEAN | QueryParser.FLAG_LOVEHATE | QueryParser.FLAG_WILDCARD, '') except xapian.QueryParserError, exception: logging.warning('Invalid query string: ' + exception.get_msg()) return Query()
def phrase(terms, prefix=None, language=None, window=3): if isinstance(terms, basestring): terms = terms.split() if prefix: terms = ['%s:%s' % (prefix, t) for t in terms] if language: stem = xapian.Stem(language) terms = ['Z%s' % stem(t) for t in terms] print terms return Query(Query.OP_PHRASE, terms, window)
def parse_query(self, query_dict, query_string): logging.debug('parse_query %r %r', query_dict, query_string) queries = [] query_dict = dict(query_dict) if query_string is not None: queries.append(self._parse_query_xapian(str(query_string))) for name, value in query_dict.items(): if name in _QUERY_TERM_MAP: queries.append(self._parse_query_term(name, _QUERY_TERM_MAP[name], value)) elif name in _QUERY_VALUE_MAP: queries.append(self._parse_query_value(name, _QUERY_VALUE_MAP[name], value)) else: logging.warning('Unknown term: %r=%r', name, value) if not queries: queries.append(Query('')) logging.debug('queries: %r', [str(q) for q in queries]) return Query(Query.OP_AND, queries)
def _make_PhraseQuery(field_cls, value, prefix): # Get the words # XXX It's too complex (slow), we must use xapian # Problem => _index_cjk xdoc = Document() # XXX Language = 'en' by default _index(xdoc, field_cls, value, prefix, 'en') words = [] for term_list_item in xdoc: term = term_list_item.term for termpos in term_list_item.positer: words.append((termpos, term)) words.sort() words = [ word[1] for word in words ] # Make the query return Query(OP_PHRASE, words)
def _parse_query_value(self, name, info, value): if isinstance(value, list): subqueries = [self._parse_query_value(name, info, word) for word in value] return Query(Query.OP_OR, subqueries) elif isinstance(value, tuple): return self._parse_query_value_range(name, info, value) elif isinstance(value, dict): # compatibility option for timestamp: {'start': 0, 'end': 1} start = value.get('start', 0) end = value.get('end', sys.maxint) return self._parse_query_value_range(name, info, (start, end)) else: return self._parse_query_value_range(name, info, (value, value))
def elite(terms, prefix=None, window=10): if isinstance(terms, basestring): terms = terms.split() if prefix: terms = ['%s:%s' % (prefix, t) for t in terms] return Query(Query.OP_ELITE_SET, terms, window)
def search(self, query=None, **kw): catalog = self._catalog xquery = _get_xquery(catalog, query, **kw) query = Query(Query.OP_AND, [self._xquery, xquery]) return SearchResults(catalog, query)
def get_docs(db): enquire = Enquire(db) enquire.set_query(Query('')) docs_max = enquire.get_mset(0, 0).get_matches_upper_bound() return [doc.get_document() for doc in enquire.get_mset(0, docs_max)]
def operator(self, query, op): """Wrap self with an operator and another query. """ query = querify(query) return self.copy(query=Query(op, self.query, query))
def scale(self, factor): return self.copy(query=Query(Query.OP_SCALE_WEIGHT, self.query, factor))
def elite(db, terms, window=10, **kwargs): return Search(db, Query(Query.OP_ELITE_SET, terms, window), **kwargs)
def phrase(db, terms, window=10, **kwargs): return Search(db, Query(Query.OP_PHRASE, terms, window), **kwargs)
def search(self, query=None, **kw): xquery = _get_xquery(self._catalog, query, **kw) query = Query(Query.OP_AND, [self._xquery, xquery]) return self.__class__(self._catalog, query)
def _query2xquery(self, query): """take a "itools" query and return a "xapian" query """ query_class = type(query) fields = self._fields metadata = self._metadata # All Query if query_class is AllQuery: return Query('') # PhraseQuery, the field must be indexed if query_class is PhraseQuery: name = query.name if type(name) is not str: raise TypeError, "unexpected '%s'" % type(name) # If there is a problem => an empty result if name not in metadata: warn_not_indexed(name) return Query() info = metadata[name] try: prefix = info['prefix'] except KeyError: raise ValueError, 'the field "%s" must be indexed' % name field_cls = _get_field_cls(name, fields, info) return _make_PhraseQuery(field_cls, query.value, prefix) # RangeQuery, the field must be stored if query_class is RangeQuery: name = query.name if type(name) is not str: raise TypeError, "unexpected '%s'" % type(name) # If there is a problem => an empty result if name not in metadata: warn_not_indexed(name) return Query() info = metadata[name] value = info.get('value') if value is None: raise AttributeError, MSG_NOT_STORED.format(name=name) field_cls = _get_field_cls(name, fields, info) if field_cls.multiple: error = 'range-query not supported on multiple fields' raise ValueError, error left = query.left if left is not None: left = _encode_simple_value(field_cls, left) right = query.right if right is not None: right = _encode_simple_value(field_cls, right) # Case 1: no limits, return everything if left is None and right is None: return Query('') # Case 2: left limit only if right is None: return Query(OP_VALUE_GE, value, left) # Case 3: right limit only if left is None: return Query(OP_VALUE_LE, value, right) # Case 4: left and right return Query(OP_VALUE_RANGE, value, left, right) # StartQuery, the field must be stored if query_class is StartQuery: name = query.name if type(name) is not str: raise TypeError, "unexpected '%s'" % type(name) # If there is a problem => an empty result if name not in metadata: warn_not_indexed(name) return Query() info = metadata[name] value_nb = info.get('value') if value_nb is None: raise AttributeError, MSG_NOT_STORED.format(name=name) field_cls = _get_field_cls(name, fields, info) value = query.value value = _encode(field_cls, value) if value: # good = {x / x >= value} good = Query(OP_VALUE_GE, value_nb, value) # Construct the variable end_value: # end_value = the word "after" value: toto => totp # Delete the '\xff' at the end of value end_value = value while end_value and ord(end_value[-1]) == 255: end_value = end_value[:-1] # Normal case: end_value is not empty if end_value: # The world after end_value = end_value[:-1] + chr(ord(end_value[-1]) + 1) # bad = {x / x >= end_value} bad = Query(OP_VALUE_GE, value_nb, end_value) # Return {x / x in good but x not in bad} return Query(OP_AND_NOT, good, bad) # If end_value is empty else: # Return {x / x in good} return good else: # If value == '', we return everything return Query('') # TextQuery, the field must be indexed if query_class is TextQuery: name = query.name if type(name) is not str: raise TypeError, "unexpected %s for 'name'" % type(name) # If there is a problem => an empty result if name not in metadata: warn_not_indexed(name) return Query() info = metadata[name] field_cls = _get_field_cls(name, fields, info) try: prefix = info['prefix'] except KeyError: raise ValueError, 'the field "%s" must be indexed' % name # Remove accents from the value value = query.value if type(value) is not unicode: raise TypeError, "unexpected %s for 'value'" % type(value) value = value.translate(TRANSLATE_MAP) qp = QueryParser() qp.set_database(self._db) return qp.parse_query(_encode(field_cls, value), TQ_FLAGS, prefix) i2x = self._query2xquery # Multiple query with single atom if isinstance(query, _MultipleQuery) and len(query.atoms) == 1: return i2x(query.atoms[0]) # And if query_class is _AndQuery: return Query(OP_AND, [ i2x(q) for q in query.atoms ]) # Or if query_class is _OrQuery: return Query(OP_OR, [ i2x(q) for q in query.atoms ]) # Not if query_class is NotQuery: return Query(OP_AND_NOT, Query(''), i2x(query.query))
def operator(self, query, op): """Wrap self with an operator and another query. """ query = self._db.querify(query, language=self._language) return self.copy(query=Query(op, self.query, query))
def and_elite(self, queries): return self.and_(Query(Query.OP_ELITE_SET, queries))
def search(self, query=None, **kw): database = self._database xquery = _get_xquery(database.catalog, query, **kw) query = Query(Query.OP_AND, [self._xquery, xquery]) return self.__class__(database, query)
def near(db, terms, window=10, **kwargs): return Search(db, Query(Query.OP_NEAR, terms, window), **kwargs)
def or_elite(self, *queries): return self.or_(Query(Query.OP_ELITE_SET, queries))