def __init__(self, catalog, xquery): self._catalog = catalog self._xquery = xquery # Enquire enquire = Enquire(catalog._db) enquire.set_query(xquery) self._enquire = enquire # Max max = enquire.get_mset(0,0).get_matches_upper_bound() self._max = enquire.get_mset(0, max).size()
def morelike(self, model_clz, query, select=["*"], limit=10, offset=0, order=None, ascending=True): """ Find documents in the database most relevant to the given terms. 'select' - The set of keys from the documents picked data dictionary to return or use to construct a record. '*' means all available keys. 'limit' - The number of records to return. 'offset' - How many records to skip before returning. 'order' - The number or name (if order_names was provided to the constructor) of the column to sort the results by. 'ascending' - Whether to sort the results in ascending or descending order. """ self.database.reopen() suggested_terms = self.suggest(model_clz, query, limit, offset, order, ascending) terms = [term[0] for term in suggested_terms] enq = Enquire(self.database) enq.set_collapse_key(DOC_ITEM_ID) if order is not None: if issubclass(type(order), basestring): order = self.sort_names.index(order) if order == -1: raise TypeError("There is no sort name %s" % order) enq.set_sort_by_value(order, ascending) else: enq.set_sort_by_relevance() query = Query(Query.OP_ELITE_SET, terms, limit) query = self._add_model_query(query,model_clz) enq.set_query(query) mset = enq.get_mset(offset, limit) return self._generate_records(mset, select)
def select(self, model_clz, query, select=set(["*"]), limit=10, offset=0, order=None, ascending=True, partial=False): """Select documents from the database matching 'query'. 'select' - The set of keys from the documents picked data dictionary to return or use to construct a record. '*' means all available keys. 'limit' - The number of records to return. 'offset' - How many records to skip before returning. 'order' - The number or name (if order_names was provided to the constructor) of the column to sort the results by. 'ascending' - Whether to sort the results in ascending or descending order. 'partial' - Wether to support wildcard partial queries like "foo*". """ self.database.reopen() enq = Enquire(self.database) enq.set_collapse_key(DOC_ITEM_ID) if order is not None: if issubclass(type(order), basestring): order = self.sort_names.index(order) if order == -1: raise TypeError("There is no sort name %s" % order) enq.set_sort_by_value(order, ascending) else: enq.set_sort_by_relevance() query, query_parser = self._parse_query(query, partial=partial) query = self._add_model_query(query,model_clz) log.debug(query) enq.set_query(query) mset = enq.get_mset(offset, limit) return self._generate_records(mset, select)
def __init__(self, catalog, xquery): self._catalog = catalog self._xquery = xquery # Enquire enquire = Enquire(catalog._db) enquire.set_query(xquery) self._enquire = enquire # Max max = enquire.get_mset(0, 0).get_matches_upper_bound() self._max = enquire.get_mset(0, max).size()
def estimate(self, model_clz, query, limit=10, partial=False): """ Estimate the number of documents that will be yielded with the given query. limit tells the estimator the minimum number of documents to consider. """ self.database.reopen() enq = Enquire(self.database) enq.set_collapse_key(DOC_ITEM_ID) query, query_parser = self._parse_query(query, partial=partial) query = self._add_model_query(query,model_clz) log.debug(query) enq.set_query(query) return enq.get_mset(0, 0, limit).get_matches_estimated()
def get_docs(db): enquire = Enquire(db) enquire.set_query(Query('')) docs_max = enquire.get_mset(0, 0).get_matches_upper_bound() return [doc.get_document() for doc in enquire.get_mset(0, docs_max)]
def find(self, query): offset = query.pop('offset', 0) limit = query.pop('limit', MAX_QUERY_LIMIT) order_by = query.pop('order_by', []) query_string = query.pop('query', None) query_parser = QueryParser() query_parser.set_database(self._database) enquire = Enquire(self._database) enquire.set_query(query_parser.parse_query(query, query_string)) # This will assure that the results count is exact. check_at_least = offset + limit + 1 if not order_by: order_by = '+timestamp' else: order_by = order_by[0] if order_by == '+timestamp': enquire.set_sort_by_value(_VALUE_TIMESTAMP, True) elif order_by == '-timestamp': enquire.set_sort_by_value(_VALUE_TIMESTAMP, False) elif order_by == '+title': enquire.set_sort_by_value(_VALUE_TITLE, True) elif order_by == '-title': enquire.set_sort_by_value(_VALUE_TITLE, False) elif order_by == '+filesize': enquire.set_sort_by_value(_VALUE_FILESIZE, True) elif order_by == '-filesize': enquire.set_sort_by_value(_VALUE_FILESIZE, False) elif order_by == '+creation_time': enquire.set_sort_by_value(_VALUE_CREATION_TIME, True) elif order_by == '-creation_time': enquire.set_sort_by_value(_VALUE_CREATION_TIME, False) else: logging.warning('Unsupported property for sorting: %s', order_by) query_result = enquire.get_mset(offset, limit, check_at_least) total_count = query_result.get_matches_estimated() uids = [] for hit in query_result: uids.append(hit.document.get_value(_VALUE_UID)) return (uids, total_count)
def _enquire(self): enquire = Enquire(self._catalog._db) enquire.set_query(self._xquery) return enquire
def _enquire(self): enquire = Enquire(self._database.catalog._db) enquire.set_query(self._xquery) return enquire
def get_docs(db): enquire = Enquire(db) enquire.set_query(Query('')) docs_max = enquire.get_mset(0,0).get_matches_upper_bound() return [doc.document for doc in enquire.get_mset(0, docs_max)]
def suggest(self, model_clz, query, limit=10, offset=0, order=None, ascending=True): """ Suggest terms that would possibly yield more relevant results for the given query. """ self.database.reopen() enq = Enquire(self.database) enq.set_collapse_key(DOC_ITEM_ID) if order is not None: if issubclass(type(order), basestring): order = self.sort_names.index(order) if order == -1: raise TypeError("There is no sort name %s" % order) enq.set_sort_by_value(order, ascending) else: enq.set_sort_by_relevance() query_parser = self._query_parser() query = query_parser.parse_query(query) query = self._add_model_query(query,model_clz) log.debug(query) enq.set_query(query) mset = enq.get_mset(offset, limit) rset = RSet() for m in mset: rset.add_document(m[MSET_DID]) eset = enq.get_eset(limit, rset) for item in eset.items: yield (item[0].decode('utf8'), item[1])