示例#1
0
class Catalog(object):

    def __init__(self, ref, fields, read_only=False, asynchronous_mode=True):
        # Load the database
        if isinstance(ref, (Database, WritableDatabase)):
            self._db = ref
        else:
            path = lfs.get_absolute_path(ref)
            if read_only:
                self._db = Database(path)
            else:
                self._db = WritableDatabase(path, DB_OPEN)

        db = self._db
        self._asynchronous = asynchronous_mode
        self._fields = fields

        # Asynchronous mode
        if not read_only and asynchronous_mode:
            db.begin_transaction(False)

        # Load the xfields from the database
        self._metadata = {}
        self._value_nb = 0
        self._prefix_nb = 0
        self._load_all_internal()


    #######################################################################
    # API / Public / Transactions
    #######################################################################
    def save_changes(self):
        """Save the last changes to disk.
        """
        if not self._asynchronous:
            raise ValueError, "The transactions are synchronous"
        db = self._db
        db.commit_transaction()
        db.flush()
        db.begin_transaction(False)


    def abort_changes(self):
        """Abort the last changes made in memory.
        """
        if not self._asynchronous:
            raise ValueError, "The transactions are synchronous"
        db = self._db
        db.cancel_transaction()
        self._load_all_internal()
        db.begin_transaction(False)


    #######################################################################
    # API / Public / (Un)Index
    #######################################################################
    def index_document(self, document):
        """Add a new document.
        """
        db = self._db
        metadata = self._metadata
        fields = self._fields

        # Check the input
        if type(document) is dict:
            doc_values = document
        else:
            doc_values = document.get_catalog_values()

        # Make the xapian document
        metadata_modified = False
        xdoc = Document()
        for name, value in doc_values.iteritems():
            if name not in fields:
                warn_not_indexed_nor_stored(name)
            field_cls = fields[name]

            # New field ?
            if name not in metadata:
                info = metadata[name] = self._get_info(field_cls, name)
                metadata_modified = True
            else:
                info = metadata[name]

            # XXX This comment is no longer valid, now the key field is
            #     always abspath with field_cls = String
            # Store the key field with the prefix 'Q'
            # Comment: the key field is indexed twice, but we must do it
            #          one => to index (as the others)
            #          two => to index without split
            #          the problem is that "_encode != _index"
            if name == 'abspath':
                key_value = _reduce_size(_encode(field_cls, value))
                xdoc.add_term('Q' + key_value)

            # A multilingual value?
            if isinstance(value, dict):
                for language, lang_value in value.iteritems():
                    lang_name = name + '_' + language

                    # New field ?
                    if lang_name not in metadata:
                        lang_info = self._get_info(field_cls, lang_name)
                        lang_info['from'] = name
                        metadata[lang_name] = lang_info
                        metadata_modified = True
                    else:
                        lang_info = metadata[lang_name]

                    # The value can be None
                    if lang_value is not None:
                        # Is stored ?
                        if 'value' in lang_info:
                            xdoc.add_value(lang_info['value'],
                                           _encode(field_cls, lang_value))
                        # Is indexed ?
                        if 'prefix' in lang_info:
                            # Comment: Index twice
                            _index(xdoc, field_cls, lang_value,
                                   info['prefix'], language)
                            _index(xdoc, field_cls, lang_value,
                                   lang_info['prefix'], language)
            # The value can be None
            elif value is not None:
                # Is stored ?
                if 'value' in info:
                    xdoc.add_value(info['value'], _encode(field_cls, value))
                # Is indexed ?
                if 'prefix' in info:
                    # By default language='en'
                    _index(xdoc, field_cls, value, info['prefix'], 'en')

        # TODO: Don't store two documents with the same key field!

        # Save the doc
        db.add_document(xdoc)

        # Store metadata ?
        if metadata_modified:
            db.set_metadata('metadata', dumps(metadata))


    def unindex_document(self, abspath):
        """Remove the document that has value stored in its abspath.
           If the document does not exist => no error
        """
        data = _reduce_size(_encode(self._fields['abspath'], abspath))
        self._db.delete_document('Q' + data)


    #######################################################################
    # API / Public / Search
    #######################################################################
    def get_unique_values(self, name):
        """Return all the terms of a given indexed field
        """
        metadata = self._metadata
        # If there is a problem => an empty result
        if name not in metadata:
            warn_not_indexed(name)
            return set()

        # Ok
        prefix = metadata[name]['prefix']
        prefix_len = len(prefix)
        return set([ t.term[prefix_len:] for t in self._db.allterms(prefix) ])


    #######################################################################
    # API / Private
    #######################################################################
    def _get_info(self, field_cls, name):
        # The key field ?
        if name == 'abspath':
            if not (issubclass(field_cls, String) and
                    field_cls.stored and
                    field_cls.indexed):
                raise ValueError, ('the abspath field must be declared as '
                                   'String(stored=True, indexed=True)')
        # Stored ?
        info = {}
        if getattr(field_cls, 'stored', False):
            info['value'] = self._value_nb
            self._value_nb += 1
        # Indexed ?
        if getattr(field_cls, 'indexed', False):
            info['prefix'] = _get_prefix(self._prefix_nb)
            self._prefix_nb += 1

        return info


    def _load_all_internal(self):
        """Load the metadata from the database
        """
        self._value_nb = 0
        self._prefix_nb = 0

        metadata = self._db.get_metadata('metadata')
        if metadata == '':
            self._metadata = {}
        else:
            self._metadata = loads(metadata)
            for name, info in self._metadata.iteritems():
                if 'value' in info:
                    self._value_nb += 1
                if 'prefix' in info:
                    self._prefix_nb += 1


    def _query2xquery(self, query):
        """take a "itools" query and return a "xapian" query
        """
        query_class = type(query)
        fields = self._fields
        metadata = self._metadata

        # All Query
        if query_class is AllQuery:
            return Query('')

        # PhraseQuery, the field must be indexed
        if query_class is PhraseQuery:
            name = query.name
            if type(name) is not str:
                raise TypeError, "unexpected '%s'" % type(name)
            # If there is a problem => an empty result
            if name not in metadata:
                warn_not_indexed(name)
                return Query()
            info = metadata[name]
            try:
                prefix = info['prefix']
            except KeyError:
                raise ValueError, 'the field "%s" must be indexed' % name
            field_cls = _get_field_cls(name, fields, info)
            return _make_PhraseQuery(field_cls, query.value, prefix)

        # RangeQuery, the field must be stored
        if query_class is RangeQuery:
            name = query.name
            if type(name) is not str:
                raise TypeError, "unexpected '%s'" % type(name)
            # If there is a problem => an empty result
            if name not in metadata:
                warn_not_indexed(name)
                return Query()

            info = metadata[name]
            value = info.get('value')
            if value is None:
                raise AttributeError, MSG_NOT_STORED.format(name=name)
            field_cls = _get_field_cls(name, fields, info)
            if field_cls.multiple:
                error = 'range-query not supported on multiple fields'
                raise ValueError, error

            left = query.left
            if left is not None:
                left = _encode_simple_value(field_cls, left)

            right = query.right
            if right is not None:
                right = _encode_simple_value(field_cls, right)

            # Case 1: no limits, return everything
            if left is None and right is None:
                return Query('')

            # Case 2: left limit only
            if right is None:
                return Query(OP_VALUE_GE, value, left)

            # Case 3: right limit only
            if left is None:
                return Query(OP_VALUE_LE, value, right)

            # Case 4: left and right
            return Query(OP_VALUE_RANGE, value, left, right)

        # StartQuery, the field must be stored
        if query_class is StartQuery:
            name = query.name
            if type(name) is not str:
                raise TypeError, "unexpected '%s'" % type(name)
            # If there is a problem => an empty result
            if name not in metadata:
                warn_not_indexed(name)
                return Query()

            info = metadata[name]
            value_nb = info.get('value')
            if value_nb is None:
                raise AttributeError, MSG_NOT_STORED.format(name=name)
            field_cls = _get_field_cls(name, fields, info)

            value = query.value
            value = _encode(field_cls, value)

            if value:
                # good = {x / x >= value}
                good = Query(OP_VALUE_GE, value_nb, value)

                # Construct the variable end_value:
                # end_value = the word "after" value: toto => totp

                # Delete the '\xff' at the end of value
                end_value = value
                while end_value and ord(end_value[-1]) == 255:
                    end_value = end_value[:-1]

                # Normal case: end_value is not empty
                if end_value:
                    # The world after
                    end_value = end_value[:-1] + chr(ord(end_value[-1]) + 1)

                    # bad = {x / x >= end_value}
                    bad = Query(OP_VALUE_GE, value_nb, end_value)

                    # Return {x / x in good but x not in bad}
                    return Query(OP_AND_NOT, good, bad)
                # If end_value is empty
                else:
                    # Return {x / x in good}
                    return good
            else:
                # If value == '', we return everything
                return Query('')

        # TextQuery, the field must be indexed
        if query_class is TextQuery:
            name = query.name
            if type(name) is not str:
                raise TypeError, "unexpected %s for 'name'" % type(name)
            # If there is a problem => an empty result
            if name not in metadata:
                warn_not_indexed(name)
                return Query()

            info = metadata[name]
            field_cls = _get_field_cls(name, fields, info)
            try:
                prefix = info['prefix']
            except KeyError:
                raise ValueError, 'the field "%s" must be indexed' % name

            # Remove accents from the value
            value = query.value
            if type(value) is not unicode:
                raise TypeError, "unexpected %s for 'value'" % type(value)
            value = value.translate(TRANSLATE_MAP)

            qp = QueryParser()
            qp.set_database(self._db)
            return qp.parse_query(_encode(field_cls, value), TQ_FLAGS, prefix)

        i2x = self._query2xquery
        # Multiple query with single atom
        if isinstance(query, _MultipleQuery) and len(query.atoms) == 1:
            return i2x(query.atoms[0])

        # And
        if query_class is _AndQuery:
            return Query(OP_AND, [ i2x(q) for q in query.atoms ])

        # Or
        if query_class is _OrQuery:
            return Query(OP_OR, [ i2x(q) for q in query.atoms ])

        # Not
        if query_class is NotQuery:
            return Query(OP_AND_NOT, Query(''), i2x(query.query))
示例#2
0
文件: catalog.py 项目: nkhine/itools
class Catalog(object):

    nb_changes = 0
    logger = None
    _db = None
    read_only = False

    def __init__(self, ref, fields, read_only=False, asynchronous_mode=True):
        self.read_only = read_only
        # Load the database
        if isinstance(ref, (Database, WritableDatabase)):
            path = None
            self._db = ref
        else:
            path = lfs.get_absolute_path(ref)
            if read_only:
                self._db = Database(path)
            else:
                self._db = WritableDatabase(path, DB_OPEN)

        db = self._db
        self._asynchronous = asynchronous_mode
        self._fields = fields
        # FIXME: There's a bug in xapian:
        # Wa cannot get stored values if DB not flushed
        self.commit_each_transaction = True
        # Asynchronous mode
        if not read_only and asynchronous_mode:
            db.begin_transaction(self.commit_each_transaction)
        # Set XAPIAN_FLUSH_THRESHOLD
        os.environ["XAPIAN_FLUSH_THRESHOLD"] = "2000"
        # Load the xfields from the database
        self._metadata = {}
        self._value_nb = 0
        self._prefix_nb = 0
        self._load_all_internal()
        if not read_only:
            self._init_all_metadata()
        # Catalog log
        if path:
            catalog_log = '{}/catalog.log'.format(path)
            self.logger = CatalogLogger(catalog_log)
            register_logger(self.logger, 'itools.catalog')



    def _init_all_metadata(self):
        """Init new metadata (to avoid 'field is not indexed' warning)
        """
        has_changes = False
        metadata = self._metadata
        for name, field_cls in self._fields.items():
            if name not in metadata:
                print('[Catalog] New field registered: {0}'.format(name))
                has_changes = True
                metadata[name] = self._get_info(field_cls, name)
            else:
                # If the field was in the catalog but is newly stored
                if (not metadata[name].has_key('value') and
                    getattr(field_cls, 'stored', False)):
                    print('[Catalog] Indexed field is now stored: {0}'.format(name))
                    has_changes = True
                    metadata[name] = merge_dicts(
                        metadata[name],
                        self._get_info_stored())
                # If the field was stored in the catalog but is newly indexed
                if (not metadata[name].has_key('prefix') and
                    getattr(field_cls, 'indexed', False)):
                    print('[Catalog] Stored field is now indexed: {0}'.format(name))
                    has_changes = True
                    metadata[name] = merge_dicts(
                        metadata[name],
                        self._get_info_indexed())
        if has_changes:
            self._db.set_metadata('metadata', dumps(metadata))
            self._db.commit_transaction()
            self._db.begin_transaction(self.commit_each_transaction)


    #######################################################################
    # API / Public / Transactions
    #######################################################################
    def save_changes(self):
        """Save the last changes to disk.
        """
        if not self._asynchronous:
            raise ValueError, "The transactions are synchronous"
        db = self._db
        db.commit_transaction()
        db.commit()
        # FIXME: There's a bug in xapian:
        # Wa cannot get stored values if DB not flushed
        #if self.nb_changes > 200:
        #    # XXX Not working since cancel_transaction()
        #    # cancel all transactions not commited to disk
        #    # We have to use new strategy to abort transaction
        #    db.commit()
        #    if self.logger:
        #        self.logger.clear()
        #    self.nb_changes = 0
        db.begin_transaction(self.commit_each_transaction)


    def abort_changes(self):
        """Abort the last changes made in memory.
        """
        if not self._asynchronous:
            raise ValueError, "The transactions are synchronous"
        db = self._db
        if self.commit_each_transaction:
            db.cancel_transaction()
            db.begin_transaction(self.commit_each_transaction)
        else:
            raise NotImplementedError
        self._load_all_internal()


    def close(self):
        if self._db is None:
            msg = 'Catalog is already closed'
            print(msg)
            return
        if self.read_only:
            self._db.close()
            self._db = None
            return
        if self.commit_each_transaction:
            try:
                self._db.cancel_transaction()
            except:
                print('Warning: cannot cancel xapian transaction')
                self._db.close()
                self._db = None
            else:
                self._db.close()
                self._db = None
        else:
            self.abort_changes()
            self._db.commit_transaction()
            self._db.flush()
            self._db.close()
            self._db = None
        if self.logger:
            self.logger.clear()


    #######################################################################
    # API / Public / (Un)Index
    #######################################################################
    def index_document(self, document):
        self.nb_changes += 1
        abspath, term, xdoc = self.get_xdoc_from_document(document)
        self._db.replace_document(term, xdoc)
        if self.logger:
            log_info(abspath, domain='itools.catalog')


    def unindex_document(self, abspath):
        """Remove the document that has value stored in its abspath.
           If the document does not exist => no error
        """
        self.nb_changes += 1
        data = _reduce_size(_encode(self._fields['abspath'], abspath))
        self._db.delete_document('Q' + data)
        if self.logger:
            log_info(abspath, domain='itools.catalog')


    def get_xdoc_from_document(self, doc_values):
        """Return (abspath, term, xdoc) from the document (resource or values as dict)
        """
        term = None
        metadata = self._metadata
        # Check the input
        if type(doc_values) is not dict:
            raise NotImplementedError('Deprecated: doc_values should be a dict')
        fields = self._fields
        abspath = doc_values['abspath']
        # Make the xapian document
        metadata_modified = False
        xdoc = Document()
        for name, value in doc_values.iteritems():
            if name not in fields:
                warn_not_indexed_nor_stored(name)
            field_cls = fields[name]

            # New field ?
            if name not in metadata:
                info = metadata[name] = self._get_info(field_cls, name)
                metadata_modified = True
            else:
                info = metadata[name]

            # XXX This comment is no longer valid, now the key field is
            #     always abspath with field_cls = String
            # Store the key field with the prefix 'Q'
            # Comment: the key field is indexed twice, but we must do it
            #          one => to index (as the others)
            #          two => to index without split
            #          the problem is that "_encode != _index"
            if name == 'abspath':
                key_value = _reduce_size(_encode(field_cls, value))
                term = 'Q' + key_value
                xdoc.add_term(term)

            # A multilingual value?
            if isinstance(value, dict):
                for language, lang_value in value.iteritems():
                    lang_name = name + '_' + language

                    # New field ?
                    if lang_name not in metadata:
                        lang_info = self._get_info(field_cls, lang_name)
                        lang_info['from'] = name
                        metadata[lang_name] = lang_info
                        metadata_modified = True
                    else:
                        lang_info = metadata[lang_name]

                    # The value can be None
                    if lang_value is not None:
                        # Is stored ?
                        if 'value' in lang_info:
                            xdoc.add_value(lang_info['value'],
                                           _encode(field_cls, lang_value))
                        # Is indexed ?
                        if 'prefix' in lang_info:
                            # Comment: Index twice
                            _index(xdoc, field_cls, lang_value,
                                   info['prefix'], language)
                            _index(xdoc, field_cls, lang_value,
                                   lang_info['prefix'], language)
            # The value can be None
            elif value is not None:
                # Is stored ?
                if 'value' in info:
                    xdoc.add_value(info['value'], _encode(field_cls, value))
                # Is indexed ?
                if 'prefix' in info:
                    # By default language='en'
                    _index(xdoc, field_cls, value, info['prefix'], 'en')
        # Store metadata ?
        if metadata_modified:
            metadata = self._metadata
            self._db.set_metadata('metadata', dumps(metadata))
        # Ok
        return abspath, term, xdoc

    #######################################################################
    # API / Public / Search
    #######################################################################
    def get_unique_values(self, name):
        """Return all the terms of a given indexed field
        """
        metadata = self._metadata
        # If there is a problem => an empty result
        if name not in metadata:
            warn_not_indexed(name)
            return set()

        # Ok
        prefix = metadata[name]['prefix']
        prefix_len = len(prefix)
        return set([ t.term[prefix_len:] for t in self._db.allterms(prefix) ])


    #######################################################################
    # API / Private
    #######################################################################
    def _get_info(self, field_cls, name):
        # The key field ?
        if name == 'abspath':
            if not (issubclass(field_cls, String) and
                    field_cls.stored and
                    field_cls.indexed):
                raise ValueError, ('the abspath field must be declared as '
                                   'String(stored=True, indexed=True)')
        # Stored ?
        info = {}
        if getattr(field_cls, 'stored', False):
            info = self._get_info_stored()
        # Indexed ?
        if getattr(field_cls, 'indexed', False):
            info = merge_dicts(info, self._get_info_indexed())
        # Ok
        return info


    def _get_info_stored(self):
        value = self._value_nb
        self._value_nb += 1
        return {'value': value}


    def _get_info_indexed(self):
        prefix = _get_prefix(self._prefix_nb)
        self._prefix_nb += 1
        return {'prefix': prefix}



    def _load_all_internal(self):
        """Load the metadata from the database
        """
        self._value_nb = 0
        self._prefix_nb = 0

        metadata = self._db.get_metadata('metadata')
        if metadata == '':
            self._metadata = {}
        else:
            self._metadata = loads(metadata)
            for name, info in self._metadata.iteritems():
                if 'value' in info:
                    self._value_nb += 1
                if 'prefix' in info:
                    self._prefix_nb += 1


    def _query2xquery(self, query):
        """take a "itools" query and return a "xapian" query
        """
        query_class = type(query)
        fields = self._fields
        metadata = self._metadata

        # All Query
        if query_class is AllQuery:
            return Query('')

        # PhraseQuery, the field must be indexed
        if query_class is PhraseQuery:
            name = query.name
            if type(name) is not str:
                raise TypeError, "unexpected '%s'" % type(name)
            # If there is a problem => an empty result
            if name not in metadata:
                warn_not_indexed(name)
                return Query()
            info = metadata[name]
            try:
                prefix = info['prefix']
            except KeyError:
                raise ValueError, 'the field "%s" must be indexed' % name
            field_cls = _get_field_cls(name, fields, info)
            return _make_PhraseQuery(field_cls, query.value, prefix)

        # RangeQuery, the field must be stored
        if query_class is RangeQuery:
            name = query.name
            if type(name) is not str:
                raise TypeError, "unexpected '%s'" % type(name)
            # If there is a problem => an empty result
            if name not in metadata:
                warn_not_indexed(name)
                return Query()

            info = metadata[name]
            value = info.get('value')
            if value is None:
                raise AttributeError, MSG_NOT_STORED.format(name=name)
            field_cls = _get_field_cls(name, fields, info)
            if field_cls.multiple:
                error = 'range-query not supported on multiple fields'
                raise ValueError, error

            left = query.left
            if left is not None:
                left = _encode_simple_value(field_cls, left)

            right = query.right
            if right is not None:
                right = _encode_simple_value(field_cls, right)

            # Case 1: no limits, return everything
            if left is None and right is None:
                return Query('')

            # Case 2: left limit only
            if right is None:
                return Query(OP_VALUE_GE, value, left)

            # Case 3: right limit only
            if left is None:
                return Query(OP_VALUE_LE, value, right)

            # Case 4: left and right
            return Query(OP_VALUE_RANGE, value, left, right)

        # StartQuery, the field must be stored
        if query_class is StartQuery:
            name = query.name
            if type(name) is not str:
                raise TypeError, "unexpected '%s'" % type(name)
            # If there is a problem => an empty result
            if name not in metadata:
                warn_not_indexed(name)
                return Query()

            info = metadata[name]
            value_nb = info.get('value')
            if value_nb is None:
                raise AttributeError, MSG_NOT_STORED.format(name=name)
            field_cls = _get_field_cls(name, fields, info)

            value = query.value
            value = _encode(field_cls, value)

            if value:
                # good = {x / x >= value}
                good = Query(OP_VALUE_GE, value_nb, value)

                # Construct the variable end_value:
                # end_value = the word "after" value: toto => totp

                # Delete the '\xff' at the end of value
                end_value = value
                while end_value and ord(end_value[-1]) == 255:
                    end_value = end_value[:-1]

                # Normal case: end_value is not empty
                if end_value:
                    # The world after
                    end_value = end_value[:-1] + chr(ord(end_value[-1]) + 1)

                    # bad = {x / x >= end_value}
                    bad = Query(OP_VALUE_GE, value_nb, end_value)

                    # Return {x / x in good but x not in bad}
                    return Query(OP_AND_NOT, good, bad)
                # If end_value is empty
                else:
                    # Return {x / x in good}
                    return good
            else:
                # If value == '', we return everything
                return Query('')

        # TextQuery, the field must be indexed
        if query_class is TextQuery:
            name = query.name
            if type(name) is not str:
                raise TypeError, "unexpected %s for 'name'" % type(name)
            # If there is a problem => an empty result
            if name not in metadata:
                warn_not_indexed(name)
                return Query()

            info = metadata[name]
            field_cls = _get_field_cls(name, fields, info)
            try:
                prefix = info['prefix']
            except KeyError:
                raise ValueError, 'the field "%s" must be indexed' % name

            # Remove accents from the value
            value = query.value
            if type(value) is not unicode:
                raise TypeError, "unexpected %s for 'value'" % type(value)
            value = value.translate(TRANSLATE_MAP)

            qp = QueryParser()
            qp.set_database(self._db)
            return qp.parse_query(_encode(field_cls, value), TQ_FLAGS, prefix)

        i2x = self._query2xquery
        # Multiple query with single atom
        if isinstance(query, _MultipleQuery) and len(query.atoms) == 1:
            return i2x(query.atoms[0])

        # And
        if query_class is _AndQuery:
            return Query(OP_AND, [ i2x(q) for q in query.atoms ])

        # Or
        if query_class is _OrQuery:
            return Query(OP_OR, [ i2x(q) for q in query.atoms ])

        # Not
        if query_class is NotQuery:
            return Query(OP_AND_NOT, Query(''), i2x(query.query))
示例#3
0
文件: catalog.py 项目: kennym/itools
class Catalog(object):

    def __init__(self, ref, fields, read_only=False, asynchronous_mode=True):
        # Load the database
        if isinstance(ref, Database) or isinstance(ref, WritableDatabase):
            self._db = ref
        else:
            path = lfs.get_absolute_path(ref)
            if read_only:
                self._db = Database(path)
            else:
                self._db = WritableDatabase(path, DB_OPEN)

        db = self._db
        self._asynchronous = asynchronous_mode
        self._fields = fields

        # Asynchronous mode
        if not read_only and asynchronous_mode:
            db.begin_transaction(False)

        # Load the xfields from the database
        self._metadata = {}
        self._key_field = None
        self._value_nb = 0
        self._prefix_nb = 0
        self._load_all_internal()


    #######################################################################
    # API / Public / Transactions
    #######################################################################
    def save_changes(self):
        """Save the last changes to disk.
        """
        if not self._asynchronous:
            raise ValueError, "The transactions are synchronous"
        db = self._db
        db.commit_transaction()
        db.flush()
        db.begin_transaction(False)


    def abort_changes(self):
        """Abort the last changes made in memory.
        """
        if not self._asynchronous:
            raise ValueError, "The transactions are synchronous"
        db = self._db
        db.cancel_transaction()
        self._load_all_internal()
        db.begin_transaction(False)


    #######################################################################
    # API / Public / (Un)Index
    #######################################################################
    def index_document(self, document):
        """Add a new document.
        """
        db = self._db
        metadata = self._metadata
        fields = self._fields

        # Check the input
        if type(document) is dict:
            doc_values = document
        elif isinstance(document, CatalogAware):
            doc_values = document.get_catalog_values()
        else:
            raise ValueError, 'the document must be a CatalogAware object'

        # Make the xapian document
        metadata_modified = False
        xdoc = Document()
        for name, value in doc_values.iteritems():
            field_cls = fields[name]

            # New field ?
            if name not in metadata:
                info = metadata[name] = self._get_info(field_cls, name)
                metadata_modified = True
            else:
                info = metadata[name]

            # A multilingual value ?
            if isinstance(value, dict):
                for language, lang_value in value.iteritems():
                    lang_name = name + '_' + language

                    # New field ?
                    if lang_name not in metadata:
                        lang_info = self._get_info(field_cls, lang_name)
                        lang_info['from'] = name
                        metadata[lang_name] = lang_info
                        metadata_modified = True
                    else:
                        lang_info = metadata[lang_name]

                    # The value can be None
                    if lang_value is not None:
                        # Is stored ?
                        if 'value' in lang_info:
                            xdoc.add_value(lang_info['value'],
                                           _encode(field_cls, lang_value))
                        # Is indexed ?
                        if 'prefix' in lang_info:
                            # Comment: Index twice
                            _index(xdoc, field_cls, lang_value,
                                   info['prefix'], language)
                            _index(xdoc, field_cls, lang_value,
                                   lang_info['prefix'], language)
            # The value can be None
            elif value is not None:
                # Is stored ?
                if 'value' in info:
                    xdoc.add_value(info['value'], _encode(field_cls, value))
                # Is indexed ?
                if 'prefix' in info:
                    # By default language='en'
                    _index(xdoc, field_cls, value, info['prefix'], 'en')

        # Store the key field with the prefix 'Q'
        # Comment: the key field is indexed twice, but we must do it
        #          one => to index (as the others)
        #          two => to index without split
        #          the problem is that "_encode != _index"
        key_field = self._key_field
        if (key_field is None or key_field not in doc_values or
            doc_values[key_field] is None):
            raise ValueError, 'the "key_field" value is compulsory'
        data = _reduce_size(_encode(fields[key_field], doc_values[key_field]))
        xdoc.add_term('Q' + data)

        # TODO: Don't store two documents with the same key field!

        # Save the doc
        db.add_document(xdoc)

        # Store metadata ?
        if metadata_modified:
            db.set_metadata('metadata', dumps(metadata))


    def unindex_document(self, value):
        """Remove the document that has value stored in its key_field.
           If the document does not exist => no error
        """
        key_field = self._key_field
        if key_field is not None:
            data = _reduce_size(_encode(self._fields[key_field], value))
            self._db.delete_document('Q' + data)


    #######################################################################
    # API / Public / Search
    #######################################################################
    def search(self, query=None, **kw):
        """Launch a search in the catalog.
        """
        xquery = _get_xquery(self, query, **kw)
        return SearchResults(self, xquery)


    def get_unique_values(self, name):
        """Return all the terms of a given indexed field
        """
        metadata = self._metadata
        # If there is a problem => an empty result
        if name not in metadata:
            return set()

        # Ok
        prefix = metadata[name]['prefix']
        prefix_len = len(prefix)
        return set([ t.term[prefix_len:] for t in self._db.allterms(prefix) ])


    #######################################################################
    # API / Private
    #######################################################################
    def _get_info(self, field_cls, name):
        info = {}

        # The key field ?
        if getattr(field_cls, 'is_key_field', False):
            if self._key_field is not None:
                raise ValueError, ('You must have only one key field, '
                                   'not multiple, not multilingual')
            if not (field_cls.is_stored and field_cls.is_indexed):
                raise ValueError, ('the key field must be stored '
                                   'and indexed')
            self._key_field = name
            info['key_field'] = True
        # Stored ?
        if getattr(field_cls, 'is_stored', False):
            info['value'] = self._value_nb
            self._value_nb += 1
        # Indexed ?
        if getattr(field_cls, 'is_indexed', False):
            info['prefix'] = _get_prefix(self._prefix_nb)
            self._prefix_nb += 1

        return info


    def _load_all_internal(self):
        """Load the metadata from the database
        """
        self._key_field = None
        self._value_nb = 0
        self._prefix_nb = 0

        metadata = self._db.get_metadata('metadata')
        if metadata == '':
            self._metadata = {}
        else:
            self._metadata = loads(metadata)
            for name, info in self._metadata.iteritems():
                if 'key_field' in info:
                    self._key_field = name
                if 'value' in info:
                    self._value_nb += 1
                if 'prefix' in info:
                    self._prefix_nb += 1


    def _query2xquery(self, query):
        """take a "itools" query and return a "xapian" query
        """
        query_class = query.__class__
        fields = self._fields
        metadata = self._metadata

        # All Query
        if query_class is AllQuery:
            return Query('')

        # PhraseQuery, the field must be indexed
        if query_class is PhraseQuery:
            name = query.name
            if type(name) is not str:
                raise TypeError, "unexpected '%s'" % type(name)
            # If there is a problem => an empty result
            if name not in metadata:
                return Query()
            info = metadata[name]
            try:
                prefix = info['prefix']
            except KeyError:
                raise ValueError, 'the field "%s" must be indexed' % name
            field_cls = _get_field_cls(name, fields, info)
            return _make_PhraseQuery(field_cls, query.value, prefix)

        # RangeQuery, the field must be stored
        if query_class is RangeQuery:
            name = query.name
            if type(name) is not str:
                raise TypeError, "unexpected '%s'" % type(name)
            # If there is a problem => an empty result
            if name not in metadata:
                return Query()

            info = metadata[name]
            value = info['value']
            field_cls = _get_field_cls(name, fields, info)

            left = query.left
            right = query.right

            # Case 1: no limits, return everything
            if left is None and right is None:
                return Query('')

            # Case 2: left limit only
            if right is None:
                return Query(OP_VALUE_GE, value, _encode(field_cls, left))

            # Case 3: right limit only
            if left is None:
                return Query(OP_VALUE_LE, value, _encode(field_cls, right))

            # Case 4: left and right
            return Query(OP_VALUE_RANGE, value, _encode(field_cls, left),
                         _encode(field_cls, right))

        # StartQuery, the field must be stored
        if query_class is StartQuery:
            name = query.name
            if type(name) is not str:
                raise TypeError, "unexpected '%s'" % type(name)
            # If there is a problem => an empty result
            if name not in metadata:
                return Query()

            info = metadata[name]
            value_nb = info['value']
            field_cls = _get_field_cls(name, fields, info)

            value = query.value
            value = _encode(field_cls, value)

            if value:
                # good = {x / x >= value}
                good = Query(OP_VALUE_GE, value_nb, value)

                # Construct the variable end_value:
                # end_value = the word "after" value: toto => totp

                # Delete the '\xff' at the end of value
                end_value = value
                while end_value and ord(end_value[-1]) == 255:
                    end_value = end_value[:-1]

                # Normal case: end_value is not empty
                if end_value:
                    # The world after
                    end_value = end_value[:-1] + chr(ord(end_value[-1]) + 1)

                    # bad = {x / x >= end_value}
                    bad = Query(OP_VALUE_GE, value_nb, end_value)

                    # Return {x / x in good but x not in bad}
                    return Query(OP_AND_NOT, good, bad)
                # If end_value is empty
                else:
                    # Return {x / x in good}
                    return good
            else:
                # If value == '', we return everything
                return Query('')

        # And
        i2x = self._query2xquery
        if query_class is AndQuery:
            return Query(OP_AND, [ i2x(q) for q in query.atoms ])

        # Or
        if query_class is OrQuery:
            return Query(OP_OR, [ i2x(q) for q in query.atoms ])

        # Not
        if query_class is NotQuery:
            return Query(OP_AND_NOT, Query(''), i2x(query.query))