Python WritableDatabase примеры использования

Язык программирования: Python

Пространство имен/Пакет: xapian

Класс/Тип: WritableDatabase

Примеров на hotexamples.com: 13

Python WritableDatabase - 13 примеров найдено. Это лучшие примеры Python кода для xapian.WritableDatabase, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

WritableDatabase(4)

allterms(3)

delete_document(3)

flush(2)

get_metadata(2)

replace_document(2)

add_document(1)

begin_transaction(1)

cancel_transaction(1)

close(1)

commit_transaction(1)

postlist(1)

set_metadata(1)

Пример #1

Показать файл

Файл: catalog.py Проект: kennym/itools

    def __init__(self, ref, fields, read_only=False, asynchronous_mode=True):
        # Load the database
        if isinstance(ref, Database) or isinstance(ref, WritableDatabase):
            self._db = ref
        else:
            path = lfs.get_absolute_path(ref)
            if read_only:
                self._db = Database(path)
            else:
                self._db = WritableDatabase(path, DB_OPEN)

        db = self._db
        self._asynchronous = asynchronous_mode
        self._fields = fields

        # Asynchronous mode
        if not read_only and asynchronous_mode:
            db.begin_transaction(False)

        # Load the xfields from the database
        self._metadata = {}
        self._key_field = None
        self._value_nb = 0
        self._prefix_nb = 0
        self._load_all_internal()

Пример #2

Показать файл

    def __init__(self,
                 ref,
                 fields,
                 read_only=False,
                 asynchronous_mode=True,
                 root=None):
        self.read_only = read_only
        # Load the database
        if isinstance(ref, (Database, WritableDatabase)):
            path = None
            self._db = ref
        else:
            path = lfs.get_absolute_path(ref)
            if read_only:
                self._db = Database(path)
            else:
                self._db = WritableDatabase(path, DB_OPEN)

        db = self._db
        self._asynchronous = asynchronous_mode
        self._fields = fields
        self.root = root
        # FIXME: There's a bug in xapian:
        # Wa cannot get stored values if DB not flushed
        #self.commit_each_transaction = root is None
        self.commit_each_transaction = True
        # Asynchronous mode
        if not read_only and asynchronous_mode:
            db.begin_transaction(self.commit_each_transaction)
        # Set XAPIAN_FLUSH_THRESHOLD
        os.environ["XAPIAN_FLUSH_THRESHOLD"] = "2000"
        # Load the xfields from the database
        self._metadata = {}
        self._value_nb = 0
        self._prefix_nb = 0
        self.transaction_abspaths = []
        self._load_all_internal()
        if not read_only:
            self._init_all_metadata()
        # Catalog log
        if path:
            catalog_log = '{}/catalog.log'.format(path)
            self.logger = CatalogLogger(catalog_log)
            register_logger(self.logger, 'itools.catalog')

Пример #3

Показать файл

 def open_index(self, temp_path=False):
     # callers to open_index must be able to
     # handle an exception -- usually caused by
     # IO errors such as ENOSPC and retry putting
     # the index on a temp_path
     if temp_path:
         try:
             # mark the on-disk index stale
             self._set_index_updated(False)
         except:
             pass
         self._index_path = temp_path
     else:
         self._index_path = self._std_index_path
     try:
         self._database = WritableDatabase(self._index_path,
                                           xapian.DB_CREATE_OR_OPEN)
     except Exception as e:
         logging.error('Exception opening database')
         raise

Пример #4

Показать файл

def make_catalog(uri, fields):
    """Creates a new and empty catalog in the given uri.

    fields must be a dict. It contains some informations about the
    fields in the database. It must contain at least the abspath key field.

    For example:

      fields = {'abspath': String(stored=True, indexed=True),
                'name': Unicode(indexed=True), ...}
    """
    path = lfs.get_absolute_path(uri)
    db = WritableDatabase(path, DB_CREATE)
    return Catalog(db, fields)

Пример #5

Показать файл

    def __init__(self, ref, fields, read_only=False, asynchronous_mode=True):
        # Load the database
        if isinstance(ref, (Database, WritableDatabase)):
            self._db = ref
        else:
            path = lfs.get_absolute_path(ref)
            if read_only:
                self._db = Database(path)
            else:
                self._db = WritableDatabase(path, DB_OPEN)

        db = self._db
        self._asynchronous = asynchronous_mode
        self._fields = fields

        # Asynchronous mode
        if not read_only and asynchronous_mode:
            db.begin_transaction(False)

        # Load the xfields from the database
        self._metadata = {}
        self._value_nb = 0
        self._prefix_nb = 0
        self._load_all_internal()

Пример #6

Показать файл

Файл: catalog.py Проект: hforge/itools

    def __init__(self, ref, fields, read_only=False, asynchronous_mode=True):
        self.read_only = read_only
        # Load the database
        if isinstance(ref, (Database, WritableDatabase)):
            path = None
            self._db = ref
        else:
            path = lfs.get_absolute_path(ref)
            if read_only:
                self._db = Database(path)
            else:
                self._db = WritableDatabase(path, DB_OPEN)

        db = self._db
        self._asynchronous = asynchronous_mode
        self._fields = fields
        # FIXME: There's a bug in xapian:
        # Wa cannot get stored values if DB not flushed
        self.commit_each_transaction = True
        # Asynchronous mode
        if not read_only and asynchronous_mode:
            db.begin_transaction(self.commit_each_transaction)
        # Set XAPIAN_FLUSH_THRESHOLD
        os.environ["XAPIAN_FLUSH_THRESHOLD"] = "2000"
        # Load the xfields from the database
        self._metadata = {}
        self._value_nb = 0
        self._prefix_nb = 0
        self._load_all_internal()
        if not read_only:
            self._init_all_metadata()
        # Catalog log
        if path:
            catalog_log = '{}/catalog.log'.format(path)
            self.logger = CatalogLogger(catalog_log)
            register_logger(self.logger, 'itools.catalog')

Пример #7

Показать файл

Файл: indexstore-with-ers.py Проект: ers-devs/sugar-datastore

 def open_index(self, temp_path=False):
     # callers to open_index must be able to
     # handle an exception -- usually caused by
     # IO errors such as ENOSPC and retry putting
     # the index on a temp_path
     if temp_path:
         try:
             # mark the on-disk index stale
             self._set_index_updated(False)
         except:
             pass
         self._index_path = temp_path
     else:
          self._index_path = self._std_index_path
     try:
          self._database = WritableDatabase(self._index_path,
                                            xapian.DB_CREATE_OR_OPEN)
     except Exception as e:
          logging.error('Exception opening database')
          raise

Пример #8

Показать файл

Файл: indexstore-with-ers.py Проект: ers-devs/sugar-datastore

class IndexStore(object):
    """Index metadata and provide rich query facilities on it.
    """

    def __init__(self):
        self._database = None
        self._flush_timeout = None
        self._pending_writes = 0
        root_path=layoutmanager.get_instance().get_root_path()
        self._index_updated_path = os.path.join(root_path,
                                                'index_updated')
        self._std_index_path = layoutmanager.get_instance().get_index_path()
        self._index_path = self._std_index_path
        
        # Create an instance of ERS
        self._ers = ERS()        

    def open_index(self, temp_path=False):
        # callers to open_index must be able to
        # handle an exception -- usually caused by
        # IO errors such as ENOSPC and retry putting
        # the index on a temp_path
        if temp_path:
            try:
                # mark the on-disk index stale
                self._set_index_updated(False)
            except:
                pass
            self._index_path = temp_path
        else:
             self._index_path = self._std_index_path
        try:
             self._database = WritableDatabase(self._index_path,
                                               xapian.DB_CREATE_OR_OPEN)
        except Exception as e:
             logging.error('Exception opening database')
             raise

    def close_index(self):
        """Close index database if it is open."""
        if not self._database:
            return

        self._flush(True)
        try:
            # does Xapian write in its destructors?
            self._database = None
        except Exception as e:
            logging.error('Exception tearing down database')
            raise

    def remove_index(self):
        if not os.path.exists(self._index_path):
            return
        for f in os.listdir(self._index_path):
            os.remove(os.path.join(self._index_path, f))

    def contains(self, uid):
        '''
        Check if there is a journal entry with the given UID
        '''
        # Name of the entry
        entity_name = layoutmanager.get_instance().get_entity_name(uid)

        # Tells if the UID is in the store or not
        return self._ers.contains_entity(entity_name)

    def store(self, uid, properties):
        '''
        Add a document to the index
        '''
        document = Document()
        document.add_value(_VALUE_UID, uid)
        term_generator = TermGenerator()
        term_generator.index_document(document, properties)
        if not self.contains(uid):
            self._database.add_document(document)
        else:
            self._database.replace_document(_PREFIX_FULL_VALUE + \
                _PREFIX_UID + uid, document)
        
        self._flush(True)

    def find(self, query):
        '''
        Get a list of UIDs matching a query
        '''
        logging.warn("Query " + str(query))
        
        offset = query.pop('offset', 0)
        limit = query.pop('limit', MAX_QUERY_LIMIT)
        order_by = query.pop('order_by', [])
        query_string = query.pop('query', None)

        query_parser = QueryParser()
        query_parser.set_database(self._database)
        parsed_query = query_parser.parse_query(query, query_string)
        
        logging.warn("Parsed query " + str(parsed_query))

        # This will assure that the results count is exact.
        check_at_least = offset + limit + 1

        # TODO : Implement order by
        logging.warn('Unsupported property for sorting: %s', order_by)

        results = self._ers.search('uid', 'blah')
        logging.warn("Found " + str(results))
        
        total_count = 0
        uids = []
        return (uids, total_count)

    def delete(self, uid):
        self._database.delete_document(_PREFIX_FULL_VALUE + _PREFIX_UID + uid)
        self._flush(True)

    def get_activities(self):
        activities = []
        prefix = _PREFIX_FULL_VALUE + _PREFIX_ACTIVITY
        for term in self._database.allterms(prefix):
            activities.append(term.term[len(prefix):])
        return activities

    def flush(self):
        self._flush(True)

    def get_index_updated(self):
        return os.path.exists(self._index_updated_path)

    index_updated = property(get_index_updated)

    def _set_index_updated(self, index_updated):
        if self._std_index_path != self._index_path:
             # operating from tmpfs
             return True
        if index_updated != self.index_updated:
            if index_updated:
                index_updated_file = open(self._index_updated_path, 'w')
                # index_updated = True will happen every
                # indexstore._FLUSH_TIMEOUT seconds, so it is ok to fsync
                os.fsync(index_updated_file.fileno())
                index_updated_file.close()
            else:
                os.remove(self._index_updated_path)

    def _flush_timeout_cb(self):
        self._flush(True)
        return False

    def _flush(self, force=False):
        """Called after any database mutation"""
        logging.debug('IndexStore.flush: force=%r _pending_writes=%r',
                force, self._pending_writes)

        self._set_index_updated(False)

        if self._flush_timeout is not None:
            GObject.source_remove(self._flush_timeout)
            self._flush_timeout = None

        self._pending_writes += 1
        if force or self._pending_writes > _FLUSH_THRESHOLD:
            try:
                logging.debug("Start database flush")
                self._database.flush()
                logging.debug("Completed database flush")
            except Exception, e:
                logging.exception(e)
                logging.error("Exception during database.flush()")
                # bail out to trigger a reindex
                sys.exit(1)
            self._pending_writes = 0
            self._set_index_updated(True)
        else:

Пример #9

Показать файл

class IndexStore(object):
    """Index metadata and provide rich query facilities on it.
    """
    def __init__(self):
        self._database = None
        self._flush_timeout = None
        self._pending_writes = 0
        root_path = layoutmanager.get_instance().get_root_path()
        self._index_updated_path = os.path.join(root_path, 'index_updated')
        self._std_index_path = layoutmanager.get_instance().get_index_path()
        self._index_path = self._std_index_path

    def open_index(self, temp_path=False):
        # callers to open_index must be able to
        # handle an exception -- usually caused by
        # IO errors such as ENOSPC and retry putting
        # the index on a temp_path
        if temp_path:
            try:
                # mark the on-disk index stale
                self._set_index_updated(False)
            except:
                pass
            self._index_path = temp_path
        else:
            self._index_path = self._std_index_path
        try:
            self._database = WritableDatabase(self._index_path,
                                              xapian.DB_CREATE_OR_OPEN)
        except Exception as e:
            logging.error('Exception opening database')
            raise

    def close_index(self):
        """Close index database if it is open."""
        if not self._database:
            return

        self._flush(True)
        try:
            # does Xapian write in its destructors?
            self._database = None
        except Exception as e:
            logging.error('Exception tearing down database')
            raise

    def remove_index(self):
        if not os.path.exists(self._index_path):
            return
        for f in os.listdir(self._index_path):
            os.remove(os.path.join(self._index_path, f))

    def contains(self, uid):
        postings = self._database.postlist(_PREFIX_FULL_VALUE + \
            _PREFIX_UID + uid)
        try:
            __ = postings.next()
        except StopIteration:
            return False
        return True

    def store(self, uid, properties):
        document = Document()
        document.add_value(_VALUE_UID, uid)
        term_generator = TermGenerator()
        term_generator.index_document(document, properties)

        if not self.contains(uid):
            self._database.add_document(document)
        else:
            self._database.replace_document(_PREFIX_FULL_VALUE + \
                _PREFIX_UID + uid, document)

        self._flush(True)

    def find(self, query):
        offset = query.pop('offset', 0)
        limit = query.pop('limit', MAX_QUERY_LIMIT)
        order_by = query.pop('order_by', [])
        query_string = query.pop('query', None)

        query_parser = QueryParser()
        query_parser.set_database(self._database)
        enquire = Enquire(self._database)
        enquire.set_query(query_parser.parse_query(query, query_string))

        # This will assure that the results count is exact.
        check_at_least = offset + limit + 1

        if not order_by:
            order_by = '+timestamp'
        else:
            order_by = order_by[0]

        if order_by == '+timestamp':
            enquire.set_sort_by_value(_VALUE_TIMESTAMP, True)
        elif order_by == '-timestamp':
            enquire.set_sort_by_value(_VALUE_TIMESTAMP, False)
        elif order_by == '+title':
            enquire.set_sort_by_value(_VALUE_TITLE, True)
        elif order_by == '-title':
            enquire.set_sort_by_value(_VALUE_TITLE, False)
        elif order_by == '+filesize':
            enquire.set_sort_by_value(_VALUE_FILESIZE, True)
        elif order_by == '-filesize':
            enquire.set_sort_by_value(_VALUE_FILESIZE, False)
        elif order_by == '+creation_time':
            enquire.set_sort_by_value(_VALUE_CREATION_TIME, True)
        elif order_by == '-creation_time':
            enquire.set_sort_by_value(_VALUE_CREATION_TIME, False)
        else:
            logging.warning('Unsupported property for sorting: %s', order_by)

        query_result = enquire.get_mset(offset, limit, check_at_least)
        total_count = query_result.get_matches_estimated()

        uids = []
        for hit in query_result:
            uids.append(hit.document.get_value(_VALUE_UID))

        return (uids, total_count)

    def delete(self, uid):
        self._database.delete_document(_PREFIX_FULL_VALUE + _PREFIX_UID + uid)
        self._flush(True)

    def get_activities(self):
        activities = []
        prefix = _PREFIX_FULL_VALUE + _PREFIX_ACTIVITY
        for term in self._database.allterms(prefix):
            activities.append(term.term[len(prefix):])
        return activities

    def flush(self):
        self._flush(True)

    def get_index_updated(self):
        return os.path.exists(self._index_updated_path)

    index_updated = property(get_index_updated)

    def _set_index_updated(self, index_updated):
        if self._std_index_path != self._index_path:
            # operating from tmpfs
            return True
        if index_updated != self.index_updated:
            if index_updated:
                index_updated_file = open(self._index_updated_path, 'w')
                # index_updated = True will happen every
                # indexstore._FLUSH_TIMEOUT seconds, so it is ok to fsync
                os.fsync(index_updated_file.fileno())
                index_updated_file.close()
            else:
                os.remove(self._index_updated_path)

    def _flush_timeout_cb(self):
        self._flush(True)
        return False

    def _flush(self, force=False):
        """Called after any database mutation"""
        logging.debug('IndexStore.flush: force=%r _pending_writes=%r', force,
                      self._pending_writes)

        self._set_index_updated(False)

        if self._flush_timeout is not None:
            GLib.source_remove(self._flush_timeout)
            self._flush_timeout = None

        self._pending_writes += 1
        if force or self._pending_writes > _FLUSH_THRESHOLD:
            try:
                logging.debug("Start database flush")
                self._database.flush()
                logging.debug("Completed database flush")
            except Exception, e:
                logging.exception(e)
                logging.error("Exception during database.flush()")
                # bail out to trigger a reindex
                sys.exit(1)
            self._pending_writes = 0
            self._set_index_updated(True)
        else:

Пример #10

Показать файл

class IndexStore(object):
    """Index metadata and provide rich query facilities on it.
    """

    def __init__(self):
        self._database = None
        self._flush_timeout = None
        self._pending_writes = 0
        root_path=layoutmanager.get_instance().get_root_path()
        self._index_updated_path = os.path.join(root_path,
                                                'index_updated')
        self._std_index_path = layoutmanager.get_instance().get_index_path()
        self._index_path = self._std_index_path

    def open_index(self, temp_path=False):
        # callers to open_index must be able to
        # handle an exception -- usually caused by
        # IO errors such as ENOSPC and retry putting
        # the index on a temp_path
        if temp_path:
            try:
                # mark the on-disk index stale
                self._set_index_updated(False)
            except:
                pass
            self._index_path = temp_path
        else:
             self._index_path = self._std_index_path
        try:
             self._database = WritableDatabase(self._index_path,
                                               xapian.DB_CREATE_OR_OPEN)
        except Exception as e:
             logging.error('Exception opening database')
             raise

    def close_index(self):
        """Close index database if it is open."""
        if not self._database:
            return

        self._flush(True)
        try:
            # does Xapian write in its destructors?
            self._database = None
        except Exception as e:
            logging.error('Exception tearing down database')
            raise

    def remove_index(self):
        if not os.path.exists(self._index_path):
            return
        for f in os.listdir(self._index_path):
            os.remove(os.path.join(self._index_path, f))

    def contains(self, uid):
        postings = self._database.postlist(_PREFIX_FULL_VALUE + \
            _PREFIX_UID + uid)
        try:
            __ = postings.next()
        except StopIteration:
            return False
        return True

    def store(self, uid, properties):
        document = Document()
        document.add_value(_VALUE_UID, uid)
        term_generator = TermGenerator()
        term_generator.index_document(document, properties)

        if not self.contains(uid):
            self._database.add_document(document)
        else:
            self._database.replace_document(_PREFIX_FULL_VALUE + \
                _PREFIX_UID + uid, document)

        self._flush(True)

    def find(self, query):
        offset = query.pop('offset', 0)
        limit = query.pop('limit', MAX_QUERY_LIMIT)
        order_by = query.pop('order_by', [])
        query_string = query.pop('query', None)

        query_parser = QueryParser()
        query_parser.set_database(self._database)
        enquire = Enquire(self._database)
        enquire.set_query(query_parser.parse_query(query, query_string))

        # This will assure that the results count is exact.
        check_at_least = offset + limit + 1

        if not order_by:
            order_by = '+timestamp'
        else:
            order_by = order_by[0]

        if order_by == '+timestamp':
            enquire.set_sort_by_value(_VALUE_TIMESTAMP, True)
        elif order_by == '-timestamp':
            enquire.set_sort_by_value(_VALUE_TIMESTAMP, False)
        elif order_by == '+title':
            enquire.set_sort_by_value(_VALUE_TITLE, True)
        elif order_by == '-title':
            enquire.set_sort_by_value(_VALUE_TITLE, False)
        elif order_by == '+filesize':
            enquire.set_sort_by_value(_VALUE_FILESIZE, True)
        elif order_by == '-filesize':
            enquire.set_sort_by_value(_VALUE_FILESIZE, False)
        elif order_by == '+creation_time':
            enquire.set_sort_by_value(_VALUE_CREATION_TIME, True)
        elif order_by == '-creation_time':
            enquire.set_sort_by_value(_VALUE_CREATION_TIME, False)
        else:
            logging.warning('Unsupported property for sorting: %s', order_by)

        query_result = enquire.get_mset(offset, limit, check_at_least)
        total_count = query_result.get_matches_estimated()

        uids = []
        for hit in query_result:
            uids.append(hit.document.get_value(_VALUE_UID))

        return (uids, total_count)

    def delete(self, uid):
        self._database.delete_document(_PREFIX_FULL_VALUE + _PREFIX_UID + uid)
        self._flush(True)

    def get_activities(self):
        activities = []
        prefix = _PREFIX_FULL_VALUE + _PREFIX_ACTIVITY
        for term in self._database.allterms(prefix):
            activities.append(term.term[len(prefix):])
        return activities

    def flush(self):
        self._flush(True)

    def get_index_updated(self):
        return os.path.exists(self._index_updated_path)

    index_updated = property(get_index_updated)

    def _set_index_updated(self, index_updated):
        if self._std_index_path != self._index_path:
             # operating from tmpfs
             return True
        if index_updated != self.index_updated:
            if index_updated:
                index_updated_file = open(self._index_updated_path, 'w')
                # index_updated = True will happen every
                # indexstore._FLUSH_TIMEOUT seconds, so it is ok to fsync
                os.fsync(index_updated_file.fileno())
                index_updated_file.close()
            else:
                os.remove(self._index_updated_path)

    def _flush_timeout_cb(self):
        self._flush(True)
        return False

    def _flush(self, force=False):
        """Called after any database mutation"""
        logging.debug('IndexStore.flush: force=%r _pending_writes=%r',
                force, self._pending_writes)

        self._set_index_updated(False)

        if self._flush_timeout is not None:
            GObject.source_remove(self._flush_timeout)
            self._flush_timeout = None

        self._pending_writes += 1
        if force or self._pending_writes > _FLUSH_THRESHOLD:
            try:
                logging.debug("Start database flush")
                self._database.flush()
                logging.debug("Completed database flush")
            except Exception, e:
                logging.exception(e)
                logging.error("Exception during database.flush()")
                # bail out to trigger a reindex
                sys.exit(1)
            self._pending_writes = 0
            self._set_index_updated(True)
        else:

Пример #11

Показать файл

Файл: catalog.py Проект: nkhine/itools

class Catalog(object):

    nb_changes = 0
    logger = None
    _db = None
    read_only = False

    def __init__(self, ref, fields, read_only=False, asynchronous_mode=True):
        self.read_only = read_only
        # Load the database
        if isinstance(ref, (Database, WritableDatabase)):
            path = None
            self._db = ref
        else:
            path = lfs.get_absolute_path(ref)
            if read_only:
                self._db = Database(path)
            else:
                self._db = WritableDatabase(path, DB_OPEN)

        db = self._db
        self._asynchronous = asynchronous_mode
        self._fields = fields
        # FIXME: There's a bug in xapian:
        # Wa cannot get stored values if DB not flushed
        self.commit_each_transaction = True
        # Asynchronous mode
        if not read_only and asynchronous_mode:
            db.begin_transaction(self.commit_each_transaction)
        # Set XAPIAN_FLUSH_THRESHOLD
        os.environ["XAPIAN_FLUSH_THRESHOLD"] = "2000"
        # Load the xfields from the database
        self._metadata = {}
        self._value_nb = 0
        self._prefix_nb = 0
        self._load_all_internal()
        if not read_only:
            self._init_all_metadata()
        # Catalog log
        if path:
            catalog_log = '{}/catalog.log'.format(path)
            self.logger = CatalogLogger(catalog_log)
            register_logger(self.logger, 'itools.catalog')



    def _init_all_metadata(self):
        """Init new metadata (to avoid 'field is not indexed' warning)
        """
        has_changes = False
        metadata = self._metadata
        for name, field_cls in self._fields.items():
            if name not in metadata:
                print('[Catalog] New field registered: {0}'.format(name))
                has_changes = True
                metadata[name] = self._get_info(field_cls, name)
            else:
                # If the field was in the catalog but is newly stored
                if (not metadata[name].has_key('value') and
                    getattr(field_cls, 'stored', False)):
                    print('[Catalog] Indexed field is now stored: {0}'.format(name))
                    has_changes = True
                    metadata[name] = merge_dicts(
                        metadata[name],
                        self._get_info_stored())
                # If the field was stored in the catalog but is newly indexed
                if (not metadata[name].has_key('prefix') and
                    getattr(field_cls, 'indexed', False)):
                    print('[Catalog] Stored field is now indexed: {0}'.format(name))
                    has_changes = True
                    metadata[name] = merge_dicts(
                        metadata[name],
                        self._get_info_indexed())
        if has_changes:
            self._db.set_metadata('metadata', dumps(metadata))
            self._db.commit_transaction()
            self._db.begin_transaction(self.commit_each_transaction)


    #######################################################################
    # API / Public / Transactions
    #######################################################################
    def save_changes(self):
        """Save the last changes to disk.
        """
        if not self._asynchronous:
            raise ValueError, "The transactions are synchronous"
        db = self._db
        db.commit_transaction()
        db.commit()
        # FIXME: There's a bug in xapian:
        # Wa cannot get stored values if DB not flushed
        #if self.nb_changes > 200:
        #    # XXX Not working since cancel_transaction()
        #    # cancel all transactions not commited to disk
        #    # We have to use new strategy to abort transaction
        #    db.commit()
        #    if self.logger:
        #        self.logger.clear()
        #    self.nb_changes = 0
        db.begin_transaction(self.commit_each_transaction)


    def abort_changes(self):
        """Abort the last changes made in memory.
        """
        if not self._asynchronous:
            raise ValueError, "The transactions are synchronous"
        db = self._db
        if self.commit_each_transaction:
            db.cancel_transaction()
            db.begin_transaction(self.commit_each_transaction)
        else:
            raise NotImplementedError
        self._load_all_internal()


    def close(self):
        if self._db is None:
            msg = 'Catalog is already closed'
            print(msg)
            return
        if self.read_only:
            self._db.close()
            self._db = None
            return
        if self.commit_each_transaction:
            try:
                self._db.cancel_transaction()
            except:
                print('Warning: cannot cancel xapian transaction')
                self._db.close()
                self._db = None
            else:
                self._db.close()
                self._db = None
        else:
            self.abort_changes()
            self._db.commit_transaction()
            self._db.flush()
            self._db.close()
            self._db = None
        if self.logger:
            self.logger.clear()


    #######################################################################
    # API / Public / (Un)Index
    #######################################################################
    def index_document(self, document):
        self.nb_changes += 1
        abspath, term, xdoc = self.get_xdoc_from_document(document)
        self._db.replace_document(term, xdoc)
        if self.logger:
            log_info(abspath, domain='itools.catalog')


    def unindex_document(self, abspath):
        """Remove the document that has value stored in its abspath.
           If the document does not exist => no error
        """
        self.nb_changes += 1
        data = _reduce_size(_encode(self._fields['abspath'], abspath))
        self._db.delete_document('Q' + data)
        if self.logger:
            log_info(abspath, domain='itools.catalog')


    def get_xdoc_from_document(self, doc_values):
        """Return (abspath, term, xdoc) from the document (resource or values as dict)
        """
        term = None
        metadata = self._metadata
        # Check the input
        if type(doc_values) is not dict:
            raise NotImplementedError('Deprecated: doc_values should be a dict')
        fields = self._fields
        abspath = doc_values['abspath']
        # Make the xapian document
        metadata_modified = False
        xdoc = Document()
        for name, value in doc_values.iteritems():
            if name not in fields:
                warn_not_indexed_nor_stored(name)
            field_cls = fields[name]

            # New field ?
            if name not in metadata:
                info = metadata[name] = self._get_info(field_cls, name)
                metadata_modified = True
            else:
                info = metadata[name]

            # XXX This comment is no longer valid, now the key field is
            #     always abspath with field_cls = String
            # Store the key field with the prefix 'Q'
            # Comment: the key field is indexed twice, but we must do it
            #          one => to index (as the others)
            #          two => to index without split
            #          the problem is that "_encode != _index"
            if name == 'abspath':
                key_value = _reduce_size(_encode(field_cls, value))
                term = 'Q' + key_value
                xdoc.add_term(term)

            # A multilingual value?
            if isinstance(value, dict):
                for language, lang_value in value.iteritems():
                    lang_name = name + '_' + language

                    # New field ?
                    if lang_name not in metadata:
                        lang_info = self._get_info(field_cls, lang_name)
                        lang_info['from'] = name
                        metadata[lang_name] = lang_info
                        metadata_modified = True
                    else:
                        lang_info = metadata[lang_name]

                    # The value can be None
                    if lang_value is not None:
                        # Is stored ?
                        if 'value' in lang_info:
                            xdoc.add_value(lang_info['value'],
                                           _encode(field_cls, lang_value))
                        # Is indexed ?
                        if 'prefix' in lang_info:
                            # Comment: Index twice
                            _index(xdoc, field_cls, lang_value,
                                   info['prefix'], language)
                            _index(xdoc, field_cls, lang_value,
                                   lang_info['prefix'], language)
            # The value can be None
            elif value is not None:
                # Is stored ?
                if 'value' in info:
                    xdoc.add_value(info['value'], _encode(field_cls, value))
                # Is indexed ?
                if 'prefix' in info:
                    # By default language='en'
                    _index(xdoc, field_cls, value, info['prefix'], 'en')
        # Store metadata ?
        if metadata_modified:
            metadata = self._metadata
            self._db.set_metadata('metadata', dumps(metadata))
        # Ok
        return abspath, term, xdoc

    #######################################################################
    # API / Public / Search
    #######################################################################
    def get_unique_values(self, name):
        """Return all the terms of a given indexed field
        """
        metadata = self._metadata
        # If there is a problem => an empty result
        if name not in metadata:
            warn_not_indexed(name)
            return set()

        # Ok
        prefix = metadata[name]['prefix']
        prefix_len = len(prefix)
        return set([ t.term[prefix_len:] for t in self._db.allterms(prefix) ])


    #######################################################################
    # API / Private
    #######################################################################
    def _get_info(self, field_cls, name):
        # The key field ?
        if name == 'abspath':
            if not (issubclass(field_cls, String) and
                    field_cls.stored and
                    field_cls.indexed):
                raise ValueError, ('the abspath field must be declared as '
                                   'String(stored=True, indexed=True)')
        # Stored ?
        info = {}
        if getattr(field_cls, 'stored', False):
            info = self._get_info_stored()
        # Indexed ?
        if getattr(field_cls, 'indexed', False):
            info = merge_dicts(info, self._get_info_indexed())
        # Ok
        return info


    def _get_info_stored(self):
        value = self._value_nb
        self._value_nb += 1
        return {'value': value}


    def _get_info_indexed(self):
        prefix = _get_prefix(self._prefix_nb)
        self._prefix_nb += 1
        return {'prefix': prefix}



    def _load_all_internal(self):
        """Load the metadata from the database
        """
        self._value_nb = 0
        self._prefix_nb = 0

        metadata = self._db.get_metadata('metadata')
        if metadata == '':
            self._metadata = {}
        else:
            self._metadata = loads(metadata)
            for name, info in self._metadata.iteritems():
                if 'value' in info:
                    self._value_nb += 1
                if 'prefix' in info:
                    self._prefix_nb += 1


    def _query2xquery(self, query):
        """take a "itools" query and return a "xapian" query
        """
        query_class = type(query)
        fields = self._fields
        metadata = self._metadata

        # All Query
        if query_class is AllQuery:
            return Query('')

        # PhraseQuery, the field must be indexed
        if query_class is PhraseQuery:
            name = query.name
            if type(name) is not str:
                raise TypeError, "unexpected '%s'" % type(name)
            # If there is a problem => an empty result
            if name not in metadata:
                warn_not_indexed(name)
                return Query()
            info = metadata[name]
            try:
                prefix = info['prefix']
            except KeyError:
                raise ValueError, 'the field "%s" must be indexed' % name
            field_cls = _get_field_cls(name, fields, info)
            return _make_PhraseQuery(field_cls, query.value, prefix)

        # RangeQuery, the field must be stored
        if query_class is RangeQuery:
            name = query.name
            if type(name) is not str:
                raise TypeError, "unexpected '%s'" % type(name)
            # If there is a problem => an empty result
            if name not in metadata:
                warn_not_indexed(name)
                return Query()

            info = metadata[name]
            value = info.get('value')
            if value is None:
                raise AttributeError, MSG_NOT_STORED.format(name=name)
            field_cls = _get_field_cls(name, fields, info)
            if field_cls.multiple:
                error = 'range-query not supported on multiple fields'
                raise ValueError, error

            left = query.left
            if left is not None:
                left = _encode_simple_value(field_cls, left)

            right = query.right
            if right is not None:
                right = _encode_simple_value(field_cls, right)

            # Case 1: no limits, return everything
            if left is None and right is None:
                return Query('')

            # Case 2: left limit only
            if right is None:
                return Query(OP_VALUE_GE, value, left)

            # Case 3: right limit only
            if left is None:
                return Query(OP_VALUE_LE, value, right)

            # Case 4: left and right
            return Query(OP_VALUE_RANGE, value, left, right)

        # StartQuery, the field must be stored
        if query_class is StartQuery:
            name = query.name
            if type(name) is not str:
                raise TypeError, "unexpected '%s'" % type(name)
            # If there is a problem => an empty result
            if name not in metadata:
                warn_not_indexed(name)
                return Query()

            info = metadata[name]
            value_nb = info.get('value')
            if value_nb is None:
                raise AttributeError, MSG_NOT_STORED.format(name=name)
            field_cls = _get_field_cls(name, fields, info)

            value = query.value
            value = _encode(field_cls, value)

            if value:
                # good = {x / x >= value}
                good = Query(OP_VALUE_GE, value_nb, value)

                # Construct the variable end_value:
                # end_value = the word "after" value: toto => totp

                # Delete the '\xff' at the end of value
                end_value = value
                while end_value and ord(end_value[-1]) == 255:
                    end_value = end_value[:-1]

                # Normal case: end_value is not empty
                if end_value:
                    # The world after
                    end_value = end_value[:-1] + chr(ord(end_value[-1]) + 1)

                    # bad = {x / x >= end_value}
                    bad = Query(OP_VALUE_GE, value_nb, end_value)

                    # Return {x / x in good but x not in bad}
                    return Query(OP_AND_NOT, good, bad)
                # If end_value is empty
                else:
                    # Return {x / x in good}
                    return good
            else:
                # If value == '', we return everything
                return Query('')

        # TextQuery, the field must be indexed
        if query_class is TextQuery:
            name = query.name
            if type(name) is not str:
                raise TypeError, "unexpected %s for 'name'" % type(name)
            # If there is a problem => an empty result
            if name not in metadata:
                warn_not_indexed(name)
                return Query()

            info = metadata[name]
            field_cls = _get_field_cls(name, fields, info)
            try:
                prefix = info['prefix']
            except KeyError:
                raise ValueError, 'the field "%s" must be indexed' % name

            # Remove accents from the value
            value = query.value
            if type(value) is not unicode:
                raise TypeError, "unexpected %s for 'value'" % type(value)
            value = value.translate(TRANSLATE_MAP)

            qp = QueryParser()
            qp.set_database(self._db)
            return qp.parse_query(_encode(field_cls, value), TQ_FLAGS, prefix)

        i2x = self._query2xquery
        # Multiple query with single atom
        if isinstance(query, _MultipleQuery) and len(query.atoms) == 1:
            return i2x(query.atoms[0])

        # And
        if query_class is _AndQuery:
            return Query(OP_AND, [ i2x(q) for q in query.atoms ])

        # Or
        if query_class is _OrQuery:
            return Query(OP_OR, [ i2x(q) for q in query.atoms ])

        # Not
        if query_class is NotQuery:
            return Query(OP_AND_NOT, Query(''), i2x(query.query))

Пример #12

Показать файл

class Catalog(object):

    def __init__(self, ref, fields, read_only=False, asynchronous_mode=True):
        # Load the database
        if isinstance(ref, (Database, WritableDatabase)):
            self._db = ref
        else:
            path = lfs.get_absolute_path(ref)
            if read_only:
                self._db = Database(path)
            else:
                self._db = WritableDatabase(path, DB_OPEN)

        db = self._db
        self._asynchronous = asynchronous_mode
        self._fields = fields

        # Asynchronous mode
        if not read_only and asynchronous_mode:
            db.begin_transaction(False)

        # Load the xfields from the database
        self._metadata = {}
        self._value_nb = 0
        self._prefix_nb = 0
        self._load_all_internal()


    #######################################################################
    # API / Public / Transactions
    #######################################################################
    def save_changes(self):
        """Save the last changes to disk.
        """
        if not self._asynchronous:
            raise ValueError, "The transactions are synchronous"
        db = self._db
        db.commit_transaction()
        db.flush()
        db.begin_transaction(False)


    def abort_changes(self):
        """Abort the last changes made in memory.
        """
        if not self._asynchronous:
            raise ValueError, "The transactions are synchronous"
        db = self._db
        db.cancel_transaction()
        self._load_all_internal()
        db.begin_transaction(False)


    #######################################################################
    # API / Public / (Un)Index
    #######################################################################
    def index_document(self, document):
        """Add a new document.
        """
        db = self._db
        metadata = self._metadata
        fields = self._fields

        # Check the input
        if type(document) is dict:
            doc_values = document
        else:
            doc_values = document.get_catalog_values()

        # Make the xapian document
        metadata_modified = False
        xdoc = Document()
        for name, value in doc_values.iteritems():
            if name not in fields:
                warn_not_indexed_nor_stored(name)
            field_cls = fields[name]

            # New field ?
            if name not in metadata:
                info = metadata[name] = self._get_info(field_cls, name)
                metadata_modified = True
            else:
                info = metadata[name]

            # XXX This comment is no longer valid, now the key field is
            #     always abspath with field_cls = String
            # Store the key field with the prefix 'Q'
            # Comment: the key field is indexed twice, but we must do it
            #          one => to index (as the others)
            #          two => to index without split
            #          the problem is that "_encode != _index"
            if name == 'abspath':
                key_value = _reduce_size(_encode(field_cls, value))
                xdoc.add_term('Q' + key_value)

            # A multilingual value?
            if isinstance(value, dict):
                for language, lang_value in value.iteritems():
                    lang_name = name + '_' + language

                    # New field ?
                    if lang_name not in metadata:
                        lang_info = self._get_info(field_cls, lang_name)
                        lang_info['from'] = name
                        metadata[lang_name] = lang_info
                        metadata_modified = True
                    else:
                        lang_info = metadata[lang_name]

                    # The value can be None
                    if lang_value is not None:
                        # Is stored ?
                        if 'value' in lang_info:
                            xdoc.add_value(lang_info['value'],
                                           _encode(field_cls, lang_value))
                        # Is indexed ?
                        if 'prefix' in lang_info:
                            # Comment: Index twice
                            _index(xdoc, field_cls, lang_value,
                                   info['prefix'], language)
                            _index(xdoc, field_cls, lang_value,
                                   lang_info['prefix'], language)
            # The value can be None
            elif value is not None:
                # Is stored ?
                if 'value' in info:
                    xdoc.add_value(info['value'], _encode(field_cls, value))
                # Is indexed ?
                if 'prefix' in info:
                    # By default language='en'
                    _index(xdoc, field_cls, value, info['prefix'], 'en')

        # TODO: Don't store two documents with the same key field!

        # Save the doc
        db.add_document(xdoc)

        # Store metadata ?
        if metadata_modified:
            db.set_metadata('metadata', dumps(metadata))


    def unindex_document(self, abspath):
        """Remove the document that has value stored in its abspath.
           If the document does not exist => no error
        """
        data = _reduce_size(_encode(self._fields['abspath'], abspath))
        self._db.delete_document('Q' + data)


    #######################################################################
    # API / Public / Search
    #######################################################################
    def get_unique_values(self, name):
        """Return all the terms of a given indexed field
        """
        metadata = self._metadata
        # If there is a problem => an empty result
        if name not in metadata:
            warn_not_indexed(name)
            return set()

        # Ok
        prefix = metadata[name]['prefix']
        prefix_len = len(prefix)
        return set([ t.term[prefix_len:] for t in self._db.allterms(prefix) ])


    #######################################################################
    # API / Private
    #######################################################################
    def _get_info(self, field_cls, name):
        # The key field ?
        if name == 'abspath':
            if not (issubclass(field_cls, String) and
                    field_cls.stored and
                    field_cls.indexed):
                raise ValueError, ('the abspath field must be declared as '
                                   'String(stored=True, indexed=True)')
        # Stored ?
        info = {}
        if getattr(field_cls, 'stored', False):
            info['value'] = self._value_nb
            self._value_nb += 1
        # Indexed ?
        if getattr(field_cls, 'indexed', False):
            info['prefix'] = _get_prefix(self._prefix_nb)
            self._prefix_nb += 1

        return info


    def _load_all_internal(self):
        """Load the metadata from the database
        """
        self._value_nb = 0
        self._prefix_nb = 0

        metadata = self._db.get_metadata('metadata')
        if metadata == '':
            self._metadata = {}
        else:
            self._metadata = loads(metadata)
            for name, info in self._metadata.iteritems():
                if 'value' in info:
                    self._value_nb += 1
                if 'prefix' in info:
                    self._prefix_nb += 1


    def _query2xquery(self, query):
        """take a "itools" query and return a "xapian" query
        """
        query_class = type(query)
        fields = self._fields
        metadata = self._metadata

        # All Query
        if query_class is AllQuery:
            return Query('')

        # PhraseQuery, the field must be indexed
        if query_class is PhraseQuery:
            name = query.name
            if type(name) is not str:
                raise TypeError, "unexpected '%s'" % type(name)
            # If there is a problem => an empty result
            if name not in metadata:
                warn_not_indexed(name)
                return Query()
            info = metadata[name]
            try:
                prefix = info['prefix']
            except KeyError:
                raise ValueError, 'the field "%s" must be indexed' % name
            field_cls = _get_field_cls(name, fields, info)
            return _make_PhraseQuery(field_cls, query.value, prefix)

        # RangeQuery, the field must be stored
        if query_class is RangeQuery:
            name = query.name
            if type(name) is not str:
                raise TypeError, "unexpected '%s'" % type(name)
            # If there is a problem => an empty result
            if name not in metadata:
                warn_not_indexed(name)
                return Query()

            info = metadata[name]
            value = info.get('value')
            if value is None:
                raise AttributeError, MSG_NOT_STORED.format(name=name)
            field_cls = _get_field_cls(name, fields, info)
            if field_cls.multiple:
                error = 'range-query not supported on multiple fields'
                raise ValueError, error

            left = query.left
            if left is not None:
                left = _encode_simple_value(field_cls, left)

            right = query.right
            if right is not None:
                right = _encode_simple_value(field_cls, right)

            # Case 1: no limits, return everything
            if left is None and right is None:
                return Query('')

            # Case 2: left limit only
            if right is None:
                return Query(OP_VALUE_GE, value, left)

            # Case 3: right limit only
            if left is None:
                return Query(OP_VALUE_LE, value, right)

            # Case 4: left and right
            return Query(OP_VALUE_RANGE, value, left, right)

        # StartQuery, the field must be stored
        if query_class is StartQuery:
            name = query.name
            if type(name) is not str:
                raise TypeError, "unexpected '%s'" % type(name)
            # If there is a problem => an empty result
            if name not in metadata:
                warn_not_indexed(name)
                return Query()

            info = metadata[name]
            value_nb = info.get('value')
            if value_nb is None:
                raise AttributeError, MSG_NOT_STORED.format(name=name)
            field_cls = _get_field_cls(name, fields, info)

            value = query.value
            value = _encode(field_cls, value)

            if value:
                # good = {x / x >= value}
                good = Query(OP_VALUE_GE, value_nb, value)

                # Construct the variable end_value:
                # end_value = the word "after" value: toto => totp

                # Delete the '\xff' at the end of value
                end_value = value
                while end_value and ord(end_value[-1]) == 255:
                    end_value = end_value[:-1]

                # Normal case: end_value is not empty
                if end_value:
                    # The world after
                    end_value = end_value[:-1] + chr(ord(end_value[-1]) + 1)

                    # bad = {x / x >= end_value}
                    bad = Query(OP_VALUE_GE, value_nb, end_value)

                    # Return {x / x in good but x not in bad}
                    return Query(OP_AND_NOT, good, bad)
                # If end_value is empty
                else:
                    # Return {x / x in good}
                    return good
            else:
                # If value == '', we return everything
                return Query('')

        # TextQuery, the field must be indexed
        if query_class is TextQuery:
            name = query.name
            if type(name) is not str:
                raise TypeError, "unexpected %s for 'name'" % type(name)
            # If there is a problem => an empty result
            if name not in metadata:
                warn_not_indexed(name)
                return Query()

            info = metadata[name]
            field_cls = _get_field_cls(name, fields, info)
            try:
                prefix = info['prefix']
            except KeyError:
                raise ValueError, 'the field "%s" must be indexed' % name

            # Remove accents from the value
            value = query.value
            if type(value) is not unicode:
                raise TypeError, "unexpected %s for 'value'" % type(value)
            value = value.translate(TRANSLATE_MAP)

            qp = QueryParser()
            qp.set_database(self._db)
            return qp.parse_query(_encode(field_cls, value), TQ_FLAGS, prefix)

        i2x = self._query2xquery
        # Multiple query with single atom
        if isinstance(query, _MultipleQuery) and len(query.atoms) == 1:
            return i2x(query.atoms[0])

        # And
        if query_class is _AndQuery:
            return Query(OP_AND, [ i2x(q) for q in query.atoms ])

        # Or
        if query_class is _OrQuery:
            return Query(OP_OR, [ i2x(q) for q in query.atoms ])

        # Not
        if query_class is NotQuery:
            return Query(OP_AND_NOT, Query(''), i2x(query.query))

Пример #13

Показать файл

Файл: catalog.py Проект: kennym/itools

class Catalog(object):

    def __init__(self, ref, fields, read_only=False, asynchronous_mode=True):
        # Load the database
        if isinstance(ref, Database) or isinstance(ref, WritableDatabase):
            self._db = ref
        else:
            path = lfs.get_absolute_path(ref)
            if read_only:
                self._db = Database(path)
            else:
                self._db = WritableDatabase(path, DB_OPEN)

        db = self._db
        self._asynchronous = asynchronous_mode
        self._fields = fields

        # Asynchronous mode
        if not read_only and asynchronous_mode:
            db.begin_transaction(False)

        # Load the xfields from the database
        self._metadata = {}
        self._key_field = None
        self._value_nb = 0
        self._prefix_nb = 0
        self._load_all_internal()


    #######################################################################
    # API / Public / Transactions
    #######################################################################
    def save_changes(self):
        """Save the last changes to disk.
        """
        if not self._asynchronous:
            raise ValueError, "The transactions are synchronous"
        db = self._db
        db.commit_transaction()
        db.flush()
        db.begin_transaction(False)


    def abort_changes(self):
        """Abort the last changes made in memory.
        """
        if not self._asynchronous:
            raise ValueError, "The transactions are synchronous"
        db = self._db
        db.cancel_transaction()
        self._load_all_internal()
        db.begin_transaction(False)


    #######################################################################
    # API / Public / (Un)Index
    #######################################################################
    def index_document(self, document):
        """Add a new document.
        """
        db = self._db
        metadata = self._metadata
        fields = self._fields

        # Check the input
        if type(document) is dict:
            doc_values = document
        elif isinstance(document, CatalogAware):
            doc_values = document.get_catalog_values()
        else:
            raise ValueError, 'the document must be a CatalogAware object'

        # Make the xapian document
        metadata_modified = False
        xdoc = Document()
        for name, value in doc_values.iteritems():
            field_cls = fields[name]

            # New field ?
            if name not in metadata:
                info = metadata[name] = self._get_info(field_cls, name)
                metadata_modified = True
            else:
                info = metadata[name]

            # A multilingual value ?
            if isinstance(value, dict):
                for language, lang_value in value.iteritems():
                    lang_name = name + '_' + language

                    # New field ?
                    if lang_name not in metadata:
                        lang_info = self._get_info(field_cls, lang_name)
                        lang_info['from'] = name
                        metadata[lang_name] = lang_info
                        metadata_modified = True
                    else:
                        lang_info = metadata[lang_name]

                    # The value can be None
                    if lang_value is not None:
                        # Is stored ?
                        if 'value' in lang_info:
                            xdoc.add_value(lang_info['value'],
                                           _encode(field_cls, lang_value))
                        # Is indexed ?
                        if 'prefix' in lang_info:
                            # Comment: Index twice
                            _index(xdoc, field_cls, lang_value,
                                   info['prefix'], language)
                            _index(xdoc, field_cls, lang_value,
                                   lang_info['prefix'], language)
            # The value can be None
            elif value is not None:
                # Is stored ?
                if 'value' in info:
                    xdoc.add_value(info['value'], _encode(field_cls, value))
                # Is indexed ?
                if 'prefix' in info:
                    # By default language='en'
                    _index(xdoc, field_cls, value, info['prefix'], 'en')

        # Store the key field with the prefix 'Q'
        # Comment: the key field is indexed twice, but we must do it
        #          one => to index (as the others)
        #          two => to index without split
        #          the problem is that "_encode != _index"
        key_field = self._key_field
        if (key_field is None or key_field not in doc_values or
            doc_values[key_field] is None):
            raise ValueError, 'the "key_field" value is compulsory'
        data = _reduce_size(_encode(fields[key_field], doc_values[key_field]))
        xdoc.add_term('Q' + data)

        # TODO: Don't store two documents with the same key field!

        # Save the doc
        db.add_document(xdoc)

        # Store metadata ?
        if metadata_modified:
            db.set_metadata('metadata', dumps(metadata))


    def unindex_document(self, value):
        """Remove the document that has value stored in its key_field.
           If the document does not exist => no error
        """
        key_field = self._key_field
        if key_field is not None:
            data = _reduce_size(_encode(self._fields[key_field], value))
            self._db.delete_document('Q' + data)


    #######################################################################
    # API / Public / Search
    #######################################################################
    def search(self, query=None, **kw):
        """Launch a search in the catalog.
        """
        xquery = _get_xquery(self, query, **kw)
        return SearchResults(self, xquery)


    def get_unique_values(self, name):
        """Return all the terms of a given indexed field
        """
        metadata = self._metadata
        # If there is a problem => an empty result
        if name not in metadata:
            return set()

        # Ok
        prefix = metadata[name]['prefix']
        prefix_len = len(prefix)
        return set([ t.term[prefix_len:] for t in self._db.allterms(prefix) ])


    #######################################################################
    # API / Private
    #######################################################################
    def _get_info(self, field_cls, name):
        info = {}

        # The key field ?
        if getattr(field_cls, 'is_key_field', False):
            if self._key_field is not None:
                raise ValueError, ('You must have only one key field, '
                                   'not multiple, not multilingual')
            if not (field_cls.is_stored and field_cls.is_indexed):
                raise ValueError, ('the key field must be stored '
                                   'and indexed')
            self._key_field = name
            info['key_field'] = True
        # Stored ?
        if getattr(field_cls, 'is_stored', False):
            info['value'] = self._value_nb
            self._value_nb += 1
        # Indexed ?
        if getattr(field_cls, 'is_indexed', False):
            info['prefix'] = _get_prefix(self._prefix_nb)
            self._prefix_nb += 1

        return info


    def _load_all_internal(self):
        """Load the metadata from the database
        """
        self._key_field = None
        self._value_nb = 0
        self._prefix_nb = 0

        metadata = self._db.get_metadata('metadata')
        if metadata == '':
            self._metadata = {}
        else:
            self._metadata = loads(metadata)
            for name, info in self._metadata.iteritems():
                if 'key_field' in info:
                    self._key_field = name
                if 'value' in info:
                    self._value_nb += 1
                if 'prefix' in info:
                    self._prefix_nb += 1


    def _query2xquery(self, query):
        """take a "itools" query and return a "xapian" query
        """
        query_class = query.__class__
        fields = self._fields
        metadata = self._metadata

        # All Query
        if query_class is AllQuery:
            return Query('')

        # PhraseQuery, the field must be indexed
        if query_class is PhraseQuery:
            name = query.name
            if type(name) is not str:
                raise TypeError, "unexpected '%s'" % type(name)
            # If there is a problem => an empty result
            if name not in metadata:
                return Query()
            info = metadata[name]
            try:
                prefix = info['prefix']
            except KeyError:
                raise ValueError, 'the field "%s" must be indexed' % name
            field_cls = _get_field_cls(name, fields, info)
            return _make_PhraseQuery(field_cls, query.value, prefix)

        # RangeQuery, the field must be stored
        if query_class is RangeQuery:
            name = query.name
            if type(name) is not str:
                raise TypeError, "unexpected '%s'" % type(name)
            # If there is a problem => an empty result
            if name not in metadata:
                return Query()

            info = metadata[name]
            value = info['value']
            field_cls = _get_field_cls(name, fields, info)

            left = query.left
            right = query.right

            # Case 1: no limits, return everything
            if left is None and right is None:
                return Query('')

            # Case 2: left limit only
            if right is None:
                return Query(OP_VALUE_GE, value, _encode(field_cls, left))

            # Case 3: right limit only
            if left is None:
                return Query(OP_VALUE_LE, value, _encode(field_cls, right))

            # Case 4: left and right
            return Query(OP_VALUE_RANGE, value, _encode(field_cls, left),
                         _encode(field_cls, right))

        # StartQuery, the field must be stored
        if query_class is StartQuery:
            name = query.name
            if type(name) is not str:
                raise TypeError, "unexpected '%s'" % type(name)
            # If there is a problem => an empty result
            if name not in metadata:
                return Query()

            info = metadata[name]
            value_nb = info['value']
            field_cls = _get_field_cls(name, fields, info)

            value = query.value
            value = _encode(field_cls, value)

            if value:
                # good = {x / x >= value}
                good = Query(OP_VALUE_GE, value_nb, value)

                # Construct the variable end_value:
                # end_value = the word "after" value: toto => totp

                # Delete the '\xff' at the end of value
                end_value = value
                while end_value and ord(end_value[-1]) == 255:
                    end_value = end_value[:-1]

                # Normal case: end_value is not empty
                if end_value:
                    # The world after
                    end_value = end_value[:-1] + chr(ord(end_value[-1]) + 1)

                    # bad = {x / x >= end_value}
                    bad = Query(OP_VALUE_GE, value_nb, end_value)

                    # Return {x / x in good but x not in bad}
                    return Query(OP_AND_NOT, good, bad)
                # If end_value is empty
                else:
                    # Return {x / x in good}
                    return good
            else:
                # If value == '', we return everything
                return Query('')

        # And
        i2x = self._query2xquery
        if query_class is AndQuery:
            return Query(OP_AND, [ i2x(q) for q in query.atoms ])

        # Or
        if query_class is OrQuery:
            return Query(OP_OR, [ i2x(q) for q in query.atoms ])

        # Not
        if query_class is NotQuery:
            return Query(OP_AND_NOT, Query(''), i2x(query.query))