Пример #1
0
    def __init__(self, catalog, xquery):
        self._catalog = catalog
        self._xquery = xquery

        # Enquire
        enquire = Enquire(catalog._db)
        enquire.set_query(xquery)
        self._enquire = enquire

        # Max
        max = enquire.get_mset(0,0).get_matches_upper_bound()
        self._max = enquire.get_mset(0, max).size()
Пример #2
0
    def morelike(self, model_clz, query, select=["*"], limit=10, offset=0, order=None, 
                ascending=True):
        """ Find documents in the database most relevant to the given terms.

        'select' - The set of keys from the documents picked data
        dictionary to return or use to construct a record.  '*' means
        all available keys.

        'limit' - The number of records to return.

        'offset' - How many records to skip before returning.

        'order' - The number or name (if order_names was provided to
        the constructor) of the column to sort the results by.

        'ascending' - Whether to sort the results in ascending or
        descending order.

        """

        self.database.reopen()
        suggested_terms = self.suggest(model_clz, query, limit, offset, order, ascending)
        terms = [term[0] for term in suggested_terms]
        
        enq = Enquire(self.database)
        enq.set_collapse_key(DOC_ITEM_ID)
        if order is not None:
            if issubclass(type(order), basestring):
                order = self.sort_names.index(order)
                if order == -1:
                    raise TypeError("There is no sort name %s" % order)
            enq.set_sort_by_value(order, ascending)
        else:
            enq.set_sort_by_relevance()
                
        query = Query(Query.OP_ELITE_SET, terms, limit)
        query = self._add_model_query(query,model_clz)
        enq.set_query(query)
        mset = enq.get_mset(offset, limit)
        return self._generate_records(mset, select)
Пример #3
0
    def select(self, model_clz, query, select=set(["*"]), limit=10, offset=0, order=None, 
               ascending=True, partial=False):
        """Select documents from the database matching 'query'.

        'select' - The set of keys from the documents picked data
        dictionary to return or use to construct a record.  '*' means
        all available keys.

        'limit' - The number of records to return.

        'offset' - How many records to skip before returning.

        'order' - The number or name (if order_names was provided to
        the constructor) of the column to sort the results by.

        'ascending' - Whether to sort the results in ascending or
        descending order.

        'partial' - Wether to support wildcard partial queries like "foo*".
        """
        self.database.reopen()
        enq = Enquire(self.database)
        enq.set_collapse_key(DOC_ITEM_ID)
        if order is not None:
            if issubclass(type(order), basestring):
                order = self.sort_names.index(order)
                if order == -1:
                    raise TypeError("There is no sort name %s" % order)
            enq.set_sort_by_value(order, ascending)
        else:
            enq.set_sort_by_relevance()
            
        query, query_parser = self._parse_query(query, partial=partial)
        query = self._add_model_query(query,model_clz)
        log.debug(query)
        enq.set_query(query)
        mset = enq.get_mset(offset, limit)
        return self._generate_records(mset, select)
Пример #4
0
    def __init__(self, catalog, xquery):
        self._catalog = catalog
        self._xquery = xquery

        # Enquire
        enquire = Enquire(catalog._db)
        enquire.set_query(xquery)
        self._enquire = enquire

        # Max
        max = enquire.get_mset(0, 0).get_matches_upper_bound()
        self._max = enquire.get_mset(0, max).size()
Пример #5
0
 def estimate(self, model_clz, query, limit=10, partial=False):
     """
     Estimate the number of documents that will be yielded with
     the given query.  limit tells the estimator the minimum number
     of documents to consider.
     """
     self.database.reopen()
     enq = Enquire(self.database)
     enq.set_collapse_key(DOC_ITEM_ID)
     query, query_parser = self._parse_query(query, partial=partial)
     query = self._add_model_query(query,model_clz)
     log.debug(query)    
     enq.set_query(query)
     return enq.get_mset(0, 0, limit).get_matches_estimated()
Пример #6
0
def get_docs(db):
    enquire = Enquire(db)
    enquire.set_query(Query(''))
    docs_max = enquire.get_mset(0, 0).get_matches_upper_bound()
    return [doc.get_document() for doc in enquire.get_mset(0, docs_max)]
Пример #7
0
    def find(self, query):
        offset = query.pop('offset', 0)
        limit = query.pop('limit', MAX_QUERY_LIMIT)
        order_by = query.pop('order_by', [])
        query_string = query.pop('query', None)

        query_parser = QueryParser()
        query_parser.set_database(self._database)
        enquire = Enquire(self._database)
        enquire.set_query(query_parser.parse_query(query, query_string))

        # This will assure that the results count is exact.
        check_at_least = offset + limit + 1

        if not order_by:
            order_by = '+timestamp'
        else:
            order_by = order_by[0]

        if order_by == '+timestamp':
            enquire.set_sort_by_value(_VALUE_TIMESTAMP, True)
        elif order_by == '-timestamp':
            enquire.set_sort_by_value(_VALUE_TIMESTAMP, False)
        elif order_by == '+title':
            enquire.set_sort_by_value(_VALUE_TITLE, True)
        elif order_by == '-title':
            enquire.set_sort_by_value(_VALUE_TITLE, False)
        elif order_by == '+filesize':
            enquire.set_sort_by_value(_VALUE_FILESIZE, True)
        elif order_by == '-filesize':
            enquire.set_sort_by_value(_VALUE_FILESIZE, False)
        elif order_by == '+creation_time':
            enquire.set_sort_by_value(_VALUE_CREATION_TIME, True)
        elif order_by == '-creation_time':
            enquire.set_sort_by_value(_VALUE_CREATION_TIME, False)
        else:
            logging.warning('Unsupported property for sorting: %s', order_by)

        query_result = enquire.get_mset(offset, limit, check_at_least)
        total_count = query_result.get_matches_estimated()

        uids = []
        for hit in query_result:
            uids.append(hit.document.get_value(_VALUE_UID))

        return (uids, total_count)
Пример #8
0
    def find(self, query):
        offset = query.pop('offset', 0)
        limit = query.pop('limit', MAX_QUERY_LIMIT)
        order_by = query.pop('order_by', [])
        query_string = query.pop('query', None)

        query_parser = QueryParser()
        query_parser.set_database(self._database)
        enquire = Enquire(self._database)
        enquire.set_query(query_parser.parse_query(query, query_string))

        # This will assure that the results count is exact.
        check_at_least = offset + limit + 1

        if not order_by:
            order_by = '+timestamp'
        else:
            order_by = order_by[0]

        if order_by == '+timestamp':
            enquire.set_sort_by_value(_VALUE_TIMESTAMP, True)
        elif order_by == '-timestamp':
            enquire.set_sort_by_value(_VALUE_TIMESTAMP, False)
        elif order_by == '+title':
            enquire.set_sort_by_value(_VALUE_TITLE, True)
        elif order_by == '-title':
            enquire.set_sort_by_value(_VALUE_TITLE, False)
        elif order_by == '+filesize':
            enquire.set_sort_by_value(_VALUE_FILESIZE, True)
        elif order_by == '-filesize':
            enquire.set_sort_by_value(_VALUE_FILESIZE, False)
        elif order_by == '+creation_time':
            enquire.set_sort_by_value(_VALUE_CREATION_TIME, True)
        elif order_by == '-creation_time':
            enquire.set_sort_by_value(_VALUE_CREATION_TIME, False)
        else:
            logging.warning('Unsupported property for sorting: %s', order_by)

        query_result = enquire.get_mset(offset, limit, check_at_least)
        total_count = query_result.get_matches_estimated()

        uids = []
        for hit in query_result:
            uids.append(hit.document.get_value(_VALUE_UID))

        return (uids, total_count)
Пример #9
0
 def _enquire(self):
     enquire = Enquire(self._catalog._db)
     enquire.set_query(self._xquery)
     return enquire
Пример #10
0
 def _enquire(self):
     enquire = Enquire(self._database.catalog._db)
     enquire.set_query(self._xquery)
     return enquire
Пример #11
0
def get_docs(db):
    enquire = Enquire(db)
    enquire.set_query(Query(''))
    docs_max = enquire.get_mset(0,0).get_matches_upper_bound()
    return [doc.document for doc in enquire.get_mset(0, docs_max)]
Пример #12
0
    def suggest(self, model_clz, query, limit=10, offset=0, order=None, ascending=True):
        """
        Suggest terms that would possibly yield more relevant results
        for the given query.
        """
        self.database.reopen()
        enq = Enquire(self.database)
        enq.set_collapse_key(DOC_ITEM_ID)
        if order is not None:
            if issubclass(type(order), basestring):
                order = self.sort_names.index(order)
                if order == -1:
                    raise TypeError("There is no sort name %s" % order)
            enq.set_sort_by_value(order, ascending)
        else:
            enq.set_sort_by_relevance()
        
        query_parser = self._query_parser()
        query = query_parser.parse_query(query)
        query = self._add_model_query(query,model_clz)
        log.debug(query)    
        enq.set_query(query)
        mset = enq.get_mset(offset, limit)
        rset = RSet()
        for m in mset:
            rset.add_document(m[MSET_DID])
            
        eset = enq.get_eset(limit, rset)

        for item in eset.items:
            yield (item[0].decode('utf8'), item[1])