class Catalog(object): def __init__(self, ref, fields, read_only=False, asynchronous_mode=True): # Load the database if isinstance(ref, (Database, WritableDatabase)): self._db = ref else: path = lfs.get_absolute_path(ref) if read_only: self._db = Database(path) else: self._db = WritableDatabase(path, DB_OPEN) db = self._db self._asynchronous = asynchronous_mode self._fields = fields # Asynchronous mode if not read_only and asynchronous_mode: db.begin_transaction(False) # Load the xfields from the database self._metadata = {} self._value_nb = 0 self._prefix_nb = 0 self._load_all_internal() ####################################################################### # API / Public / Transactions ####################################################################### def save_changes(self): """Save the last changes to disk. """ if not self._asynchronous: raise ValueError, "The transactions are synchronous" db = self._db db.commit_transaction() db.flush() db.begin_transaction(False) def abort_changes(self): """Abort the last changes made in memory. """ if not self._asynchronous: raise ValueError, "The transactions are synchronous" db = self._db db.cancel_transaction() self._load_all_internal() db.begin_transaction(False) ####################################################################### # API / Public / (Un)Index ####################################################################### def index_document(self, document): """Add a new document. """ db = self._db metadata = self._metadata fields = self._fields # Check the input if type(document) is dict: doc_values = document else: doc_values = document.get_catalog_values() # Make the xapian document metadata_modified = False xdoc = Document() for name, value in doc_values.iteritems(): if name not in fields: warn_not_indexed_nor_stored(name) field_cls = fields[name] # New field ? if name not in metadata: info = metadata[name] = self._get_info(field_cls, name) metadata_modified = True else: info = metadata[name] # XXX This comment is no longer valid, now the key field is # always abspath with field_cls = String # Store the key field with the prefix 'Q' # Comment: the key field is indexed twice, but we must do it # one => to index (as the others) # two => to index without split # the problem is that "_encode != _index" if name == 'abspath': key_value = _reduce_size(_encode(field_cls, value)) xdoc.add_term('Q' + key_value) # A multilingual value? if isinstance(value, dict): for language, lang_value in value.iteritems(): lang_name = name + '_' + language # New field ? if lang_name not in metadata: lang_info = self._get_info(field_cls, lang_name) lang_info['from'] = name metadata[lang_name] = lang_info metadata_modified = True else: lang_info = metadata[lang_name] # The value can be None if lang_value is not None: # Is stored ? if 'value' in lang_info: xdoc.add_value(lang_info['value'], _encode(field_cls, lang_value)) # Is indexed ? if 'prefix' in lang_info: # Comment: Index twice _index(xdoc, field_cls, lang_value, info['prefix'], language) _index(xdoc, field_cls, lang_value, lang_info['prefix'], language) # The value can be None elif value is not None: # Is stored ? if 'value' in info: xdoc.add_value(info['value'], _encode(field_cls, value)) # Is indexed ? if 'prefix' in info: # By default language='en' _index(xdoc, field_cls, value, info['prefix'], 'en') # TODO: Don't store two documents with the same key field! # Save the doc db.add_document(xdoc) # Store metadata ? if metadata_modified: db.set_metadata('metadata', dumps(metadata)) def unindex_document(self, abspath): """Remove the document that has value stored in its abspath. If the document does not exist => no error """ data = _reduce_size(_encode(self._fields['abspath'], abspath)) self._db.delete_document('Q' + data) ####################################################################### # API / Public / Search ####################################################################### def get_unique_values(self, name): """Return all the terms of a given indexed field """ metadata = self._metadata # If there is a problem => an empty result if name not in metadata: warn_not_indexed(name) return set() # Ok prefix = metadata[name]['prefix'] prefix_len = len(prefix) return set([ t.term[prefix_len:] for t in self._db.allterms(prefix) ]) ####################################################################### # API / Private ####################################################################### def _get_info(self, field_cls, name): # The key field ? if name == 'abspath': if not (issubclass(field_cls, String) and field_cls.stored and field_cls.indexed): raise ValueError, ('the abspath field must be declared as ' 'String(stored=True, indexed=True)') # Stored ? info = {} if getattr(field_cls, 'stored', False): info['value'] = self._value_nb self._value_nb += 1 # Indexed ? if getattr(field_cls, 'indexed', False): info['prefix'] = _get_prefix(self._prefix_nb) self._prefix_nb += 1 return info def _load_all_internal(self): """Load the metadata from the database """ self._value_nb = 0 self._prefix_nb = 0 metadata = self._db.get_metadata('metadata') if metadata == '': self._metadata = {} else: self._metadata = loads(metadata) for name, info in self._metadata.iteritems(): if 'value' in info: self._value_nb += 1 if 'prefix' in info: self._prefix_nb += 1 def _query2xquery(self, query): """take a "itools" query and return a "xapian" query """ query_class = type(query) fields = self._fields metadata = self._metadata # All Query if query_class is AllQuery: return Query('') # PhraseQuery, the field must be indexed if query_class is PhraseQuery: name = query.name if type(name) is not str: raise TypeError, "unexpected '%s'" % type(name) # If there is a problem => an empty result if name not in metadata: warn_not_indexed(name) return Query() info = metadata[name] try: prefix = info['prefix'] except KeyError: raise ValueError, 'the field "%s" must be indexed' % name field_cls = _get_field_cls(name, fields, info) return _make_PhraseQuery(field_cls, query.value, prefix) # RangeQuery, the field must be stored if query_class is RangeQuery: name = query.name if type(name) is not str: raise TypeError, "unexpected '%s'" % type(name) # If there is a problem => an empty result if name not in metadata: warn_not_indexed(name) return Query() info = metadata[name] value = info.get('value') if value is None: raise AttributeError, MSG_NOT_STORED.format(name=name) field_cls = _get_field_cls(name, fields, info) if field_cls.multiple: error = 'range-query not supported on multiple fields' raise ValueError, error left = query.left if left is not None: left = _encode_simple_value(field_cls, left) right = query.right if right is not None: right = _encode_simple_value(field_cls, right) # Case 1: no limits, return everything if left is None and right is None: return Query('') # Case 2: left limit only if right is None: return Query(OP_VALUE_GE, value, left) # Case 3: right limit only if left is None: return Query(OP_VALUE_LE, value, right) # Case 4: left and right return Query(OP_VALUE_RANGE, value, left, right) # StartQuery, the field must be stored if query_class is StartQuery: name = query.name if type(name) is not str: raise TypeError, "unexpected '%s'" % type(name) # If there is a problem => an empty result if name not in metadata: warn_not_indexed(name) return Query() info = metadata[name] value_nb = info.get('value') if value_nb is None: raise AttributeError, MSG_NOT_STORED.format(name=name) field_cls = _get_field_cls(name, fields, info) value = query.value value = _encode(field_cls, value) if value: # good = {x / x >= value} good = Query(OP_VALUE_GE, value_nb, value) # Construct the variable end_value: # end_value = the word "after" value: toto => totp # Delete the '\xff' at the end of value end_value = value while end_value and ord(end_value[-1]) == 255: end_value = end_value[:-1] # Normal case: end_value is not empty if end_value: # The world after end_value = end_value[:-1] + chr(ord(end_value[-1]) + 1) # bad = {x / x >= end_value} bad = Query(OP_VALUE_GE, value_nb, end_value) # Return {x / x in good but x not in bad} return Query(OP_AND_NOT, good, bad) # If end_value is empty else: # Return {x / x in good} return good else: # If value == '', we return everything return Query('') # TextQuery, the field must be indexed if query_class is TextQuery: name = query.name if type(name) is not str: raise TypeError, "unexpected %s for 'name'" % type(name) # If there is a problem => an empty result if name not in metadata: warn_not_indexed(name) return Query() info = metadata[name] field_cls = _get_field_cls(name, fields, info) try: prefix = info['prefix'] except KeyError: raise ValueError, 'the field "%s" must be indexed' % name # Remove accents from the value value = query.value if type(value) is not unicode: raise TypeError, "unexpected %s for 'value'" % type(value) value = value.translate(TRANSLATE_MAP) qp = QueryParser() qp.set_database(self._db) return qp.parse_query(_encode(field_cls, value), TQ_FLAGS, prefix) i2x = self._query2xquery # Multiple query with single atom if isinstance(query, _MultipleQuery) and len(query.atoms) == 1: return i2x(query.atoms[0]) # And if query_class is _AndQuery: return Query(OP_AND, [ i2x(q) for q in query.atoms ]) # Or if query_class is _OrQuery: return Query(OP_OR, [ i2x(q) for q in query.atoms ]) # Not if query_class is NotQuery: return Query(OP_AND_NOT, Query(''), i2x(query.query))
class Catalog(object): nb_changes = 0 logger = None _db = None read_only = False def __init__(self, ref, fields, read_only=False, asynchronous_mode=True): self.read_only = read_only # Load the database if isinstance(ref, (Database, WritableDatabase)): path = None self._db = ref else: path = lfs.get_absolute_path(ref) if read_only: self._db = Database(path) else: self._db = WritableDatabase(path, DB_OPEN) db = self._db self._asynchronous = asynchronous_mode self._fields = fields # FIXME: There's a bug in xapian: # Wa cannot get stored values if DB not flushed self.commit_each_transaction = True # Asynchronous mode if not read_only and asynchronous_mode: db.begin_transaction(self.commit_each_transaction) # Set XAPIAN_FLUSH_THRESHOLD os.environ["XAPIAN_FLUSH_THRESHOLD"] = "2000" # Load the xfields from the database self._metadata = {} self._value_nb = 0 self._prefix_nb = 0 self._load_all_internal() if not read_only: self._init_all_metadata() # Catalog log if path: catalog_log = '{}/catalog.log'.format(path) self.logger = CatalogLogger(catalog_log) register_logger(self.logger, 'itools.catalog') def _init_all_metadata(self): """Init new metadata (to avoid 'field is not indexed' warning) """ has_changes = False metadata = self._metadata for name, field_cls in self._fields.items(): if name not in metadata: print('[Catalog] New field registered: {0}'.format(name)) has_changes = True metadata[name] = self._get_info(field_cls, name) else: # If the field was in the catalog but is newly stored if (not metadata[name].has_key('value') and getattr(field_cls, 'stored', False)): print('[Catalog] Indexed field is now stored: {0}'.format(name)) has_changes = True metadata[name] = merge_dicts( metadata[name], self._get_info_stored()) # If the field was stored in the catalog but is newly indexed if (not metadata[name].has_key('prefix') and getattr(field_cls, 'indexed', False)): print('[Catalog] Stored field is now indexed: {0}'.format(name)) has_changes = True metadata[name] = merge_dicts( metadata[name], self._get_info_indexed()) if has_changes: self._db.set_metadata('metadata', dumps(metadata)) self._db.commit_transaction() self._db.begin_transaction(self.commit_each_transaction) ####################################################################### # API / Public / Transactions ####################################################################### def save_changes(self): """Save the last changes to disk. """ if not self._asynchronous: raise ValueError, "The transactions are synchronous" db = self._db db.commit_transaction() db.commit() # FIXME: There's a bug in xapian: # Wa cannot get stored values if DB not flushed #if self.nb_changes > 200: # # XXX Not working since cancel_transaction() # # cancel all transactions not commited to disk # # We have to use new strategy to abort transaction # db.commit() # if self.logger: # self.logger.clear() # self.nb_changes = 0 db.begin_transaction(self.commit_each_transaction) def abort_changes(self): """Abort the last changes made in memory. """ if not self._asynchronous: raise ValueError, "The transactions are synchronous" db = self._db if self.commit_each_transaction: db.cancel_transaction() db.begin_transaction(self.commit_each_transaction) else: raise NotImplementedError self._load_all_internal() def close(self): if self._db is None: msg = 'Catalog is already closed' print(msg) return if self.read_only: self._db.close() self._db = None return if self.commit_each_transaction: try: self._db.cancel_transaction() except: print('Warning: cannot cancel xapian transaction') self._db.close() self._db = None else: self._db.close() self._db = None else: self.abort_changes() self._db.commit_transaction() self._db.flush() self._db.close() self._db = None if self.logger: self.logger.clear() ####################################################################### # API / Public / (Un)Index ####################################################################### def index_document(self, document): self.nb_changes += 1 abspath, term, xdoc = self.get_xdoc_from_document(document) self._db.replace_document(term, xdoc) if self.logger: log_info(abspath, domain='itools.catalog') def unindex_document(self, abspath): """Remove the document that has value stored in its abspath. If the document does not exist => no error """ self.nb_changes += 1 data = _reduce_size(_encode(self._fields['abspath'], abspath)) self._db.delete_document('Q' + data) if self.logger: log_info(abspath, domain='itools.catalog') def get_xdoc_from_document(self, doc_values): """Return (abspath, term, xdoc) from the document (resource or values as dict) """ term = None metadata = self._metadata # Check the input if type(doc_values) is not dict: raise NotImplementedError('Deprecated: doc_values should be a dict') fields = self._fields abspath = doc_values['abspath'] # Make the xapian document metadata_modified = False xdoc = Document() for name, value in doc_values.iteritems(): if name not in fields: warn_not_indexed_nor_stored(name) field_cls = fields[name] # New field ? if name not in metadata: info = metadata[name] = self._get_info(field_cls, name) metadata_modified = True else: info = metadata[name] # XXX This comment is no longer valid, now the key field is # always abspath with field_cls = String # Store the key field with the prefix 'Q' # Comment: the key field is indexed twice, but we must do it # one => to index (as the others) # two => to index without split # the problem is that "_encode != _index" if name == 'abspath': key_value = _reduce_size(_encode(field_cls, value)) term = 'Q' + key_value xdoc.add_term(term) # A multilingual value? if isinstance(value, dict): for language, lang_value in value.iteritems(): lang_name = name + '_' + language # New field ? if lang_name not in metadata: lang_info = self._get_info(field_cls, lang_name) lang_info['from'] = name metadata[lang_name] = lang_info metadata_modified = True else: lang_info = metadata[lang_name] # The value can be None if lang_value is not None: # Is stored ? if 'value' in lang_info: xdoc.add_value(lang_info['value'], _encode(field_cls, lang_value)) # Is indexed ? if 'prefix' in lang_info: # Comment: Index twice _index(xdoc, field_cls, lang_value, info['prefix'], language) _index(xdoc, field_cls, lang_value, lang_info['prefix'], language) # The value can be None elif value is not None: # Is stored ? if 'value' in info: xdoc.add_value(info['value'], _encode(field_cls, value)) # Is indexed ? if 'prefix' in info: # By default language='en' _index(xdoc, field_cls, value, info['prefix'], 'en') # Store metadata ? if metadata_modified: metadata = self._metadata self._db.set_metadata('metadata', dumps(metadata)) # Ok return abspath, term, xdoc ####################################################################### # API / Public / Search ####################################################################### def get_unique_values(self, name): """Return all the terms of a given indexed field """ metadata = self._metadata # If there is a problem => an empty result if name not in metadata: warn_not_indexed(name) return set() # Ok prefix = metadata[name]['prefix'] prefix_len = len(prefix) return set([ t.term[prefix_len:] for t in self._db.allterms(prefix) ]) ####################################################################### # API / Private ####################################################################### def _get_info(self, field_cls, name): # The key field ? if name == 'abspath': if not (issubclass(field_cls, String) and field_cls.stored and field_cls.indexed): raise ValueError, ('the abspath field must be declared as ' 'String(stored=True, indexed=True)') # Stored ? info = {} if getattr(field_cls, 'stored', False): info = self._get_info_stored() # Indexed ? if getattr(field_cls, 'indexed', False): info = merge_dicts(info, self._get_info_indexed()) # Ok return info def _get_info_stored(self): value = self._value_nb self._value_nb += 1 return {'value': value} def _get_info_indexed(self): prefix = _get_prefix(self._prefix_nb) self._prefix_nb += 1 return {'prefix': prefix} def _load_all_internal(self): """Load the metadata from the database """ self._value_nb = 0 self._prefix_nb = 0 metadata = self._db.get_metadata('metadata') if metadata == '': self._metadata = {} else: self._metadata = loads(metadata) for name, info in self._metadata.iteritems(): if 'value' in info: self._value_nb += 1 if 'prefix' in info: self._prefix_nb += 1 def _query2xquery(self, query): """take a "itools" query and return a "xapian" query """ query_class = type(query) fields = self._fields metadata = self._metadata # All Query if query_class is AllQuery: return Query('') # PhraseQuery, the field must be indexed if query_class is PhraseQuery: name = query.name if type(name) is not str: raise TypeError, "unexpected '%s'" % type(name) # If there is a problem => an empty result if name not in metadata: warn_not_indexed(name) return Query() info = metadata[name] try: prefix = info['prefix'] except KeyError: raise ValueError, 'the field "%s" must be indexed' % name field_cls = _get_field_cls(name, fields, info) return _make_PhraseQuery(field_cls, query.value, prefix) # RangeQuery, the field must be stored if query_class is RangeQuery: name = query.name if type(name) is not str: raise TypeError, "unexpected '%s'" % type(name) # If there is a problem => an empty result if name not in metadata: warn_not_indexed(name) return Query() info = metadata[name] value = info.get('value') if value is None: raise AttributeError, MSG_NOT_STORED.format(name=name) field_cls = _get_field_cls(name, fields, info) if field_cls.multiple: error = 'range-query not supported on multiple fields' raise ValueError, error left = query.left if left is not None: left = _encode_simple_value(field_cls, left) right = query.right if right is not None: right = _encode_simple_value(field_cls, right) # Case 1: no limits, return everything if left is None and right is None: return Query('') # Case 2: left limit only if right is None: return Query(OP_VALUE_GE, value, left) # Case 3: right limit only if left is None: return Query(OP_VALUE_LE, value, right) # Case 4: left and right return Query(OP_VALUE_RANGE, value, left, right) # StartQuery, the field must be stored if query_class is StartQuery: name = query.name if type(name) is not str: raise TypeError, "unexpected '%s'" % type(name) # If there is a problem => an empty result if name not in metadata: warn_not_indexed(name) return Query() info = metadata[name] value_nb = info.get('value') if value_nb is None: raise AttributeError, MSG_NOT_STORED.format(name=name) field_cls = _get_field_cls(name, fields, info) value = query.value value = _encode(field_cls, value) if value: # good = {x / x >= value} good = Query(OP_VALUE_GE, value_nb, value) # Construct the variable end_value: # end_value = the word "after" value: toto => totp # Delete the '\xff' at the end of value end_value = value while end_value and ord(end_value[-1]) == 255: end_value = end_value[:-1] # Normal case: end_value is not empty if end_value: # The world after end_value = end_value[:-1] + chr(ord(end_value[-1]) + 1) # bad = {x / x >= end_value} bad = Query(OP_VALUE_GE, value_nb, end_value) # Return {x / x in good but x not in bad} return Query(OP_AND_NOT, good, bad) # If end_value is empty else: # Return {x / x in good} return good else: # If value == '', we return everything return Query('') # TextQuery, the field must be indexed if query_class is TextQuery: name = query.name if type(name) is not str: raise TypeError, "unexpected %s for 'name'" % type(name) # If there is a problem => an empty result if name not in metadata: warn_not_indexed(name) return Query() info = metadata[name] field_cls = _get_field_cls(name, fields, info) try: prefix = info['prefix'] except KeyError: raise ValueError, 'the field "%s" must be indexed' % name # Remove accents from the value value = query.value if type(value) is not unicode: raise TypeError, "unexpected %s for 'value'" % type(value) value = value.translate(TRANSLATE_MAP) qp = QueryParser() qp.set_database(self._db) return qp.parse_query(_encode(field_cls, value), TQ_FLAGS, prefix) i2x = self._query2xquery # Multiple query with single atom if isinstance(query, _MultipleQuery) and len(query.atoms) == 1: return i2x(query.atoms[0]) # And if query_class is _AndQuery: return Query(OP_AND, [ i2x(q) for q in query.atoms ]) # Or if query_class is _OrQuery: return Query(OP_OR, [ i2x(q) for q in query.atoms ]) # Not if query_class is NotQuery: return Query(OP_AND_NOT, Query(''), i2x(query.query))
class Catalog(object): def __init__(self, ref, fields, read_only=False, asynchronous_mode=True): # Load the database if isinstance(ref, Database) or isinstance(ref, WritableDatabase): self._db = ref else: path = lfs.get_absolute_path(ref) if read_only: self._db = Database(path) else: self._db = WritableDatabase(path, DB_OPEN) db = self._db self._asynchronous = asynchronous_mode self._fields = fields # Asynchronous mode if not read_only and asynchronous_mode: db.begin_transaction(False) # Load the xfields from the database self._metadata = {} self._key_field = None self._value_nb = 0 self._prefix_nb = 0 self._load_all_internal() ####################################################################### # API / Public / Transactions ####################################################################### def save_changes(self): """Save the last changes to disk. """ if not self._asynchronous: raise ValueError, "The transactions are synchronous" db = self._db db.commit_transaction() db.flush() db.begin_transaction(False) def abort_changes(self): """Abort the last changes made in memory. """ if not self._asynchronous: raise ValueError, "The transactions are synchronous" db = self._db db.cancel_transaction() self._load_all_internal() db.begin_transaction(False) ####################################################################### # API / Public / (Un)Index ####################################################################### def index_document(self, document): """Add a new document. """ db = self._db metadata = self._metadata fields = self._fields # Check the input if type(document) is dict: doc_values = document elif isinstance(document, CatalogAware): doc_values = document.get_catalog_values() else: raise ValueError, 'the document must be a CatalogAware object' # Make the xapian document metadata_modified = False xdoc = Document() for name, value in doc_values.iteritems(): field_cls = fields[name] # New field ? if name not in metadata: info = metadata[name] = self._get_info(field_cls, name) metadata_modified = True else: info = metadata[name] # A multilingual value ? if isinstance(value, dict): for language, lang_value in value.iteritems(): lang_name = name + '_' + language # New field ? if lang_name not in metadata: lang_info = self._get_info(field_cls, lang_name) lang_info['from'] = name metadata[lang_name] = lang_info metadata_modified = True else: lang_info = metadata[lang_name] # The value can be None if lang_value is not None: # Is stored ? if 'value' in lang_info: xdoc.add_value(lang_info['value'], _encode(field_cls, lang_value)) # Is indexed ? if 'prefix' in lang_info: # Comment: Index twice _index(xdoc, field_cls, lang_value, info['prefix'], language) _index(xdoc, field_cls, lang_value, lang_info['prefix'], language) # The value can be None elif value is not None: # Is stored ? if 'value' in info: xdoc.add_value(info['value'], _encode(field_cls, value)) # Is indexed ? if 'prefix' in info: # By default language='en' _index(xdoc, field_cls, value, info['prefix'], 'en') # Store the key field with the prefix 'Q' # Comment: the key field is indexed twice, but we must do it # one => to index (as the others) # two => to index without split # the problem is that "_encode != _index" key_field = self._key_field if (key_field is None or key_field not in doc_values or doc_values[key_field] is None): raise ValueError, 'the "key_field" value is compulsory' data = _reduce_size(_encode(fields[key_field], doc_values[key_field])) xdoc.add_term('Q' + data) # TODO: Don't store two documents with the same key field! # Save the doc db.add_document(xdoc) # Store metadata ? if metadata_modified: db.set_metadata('metadata', dumps(metadata)) def unindex_document(self, value): """Remove the document that has value stored in its key_field. If the document does not exist => no error """ key_field = self._key_field if key_field is not None: data = _reduce_size(_encode(self._fields[key_field], value)) self._db.delete_document('Q' + data) ####################################################################### # API / Public / Search ####################################################################### def search(self, query=None, **kw): """Launch a search in the catalog. """ xquery = _get_xquery(self, query, **kw) return SearchResults(self, xquery) def get_unique_values(self, name): """Return all the terms of a given indexed field """ metadata = self._metadata # If there is a problem => an empty result if name not in metadata: return set() # Ok prefix = metadata[name]['prefix'] prefix_len = len(prefix) return set([ t.term[prefix_len:] for t in self._db.allterms(prefix) ]) ####################################################################### # API / Private ####################################################################### def _get_info(self, field_cls, name): info = {} # The key field ? if getattr(field_cls, 'is_key_field', False): if self._key_field is not None: raise ValueError, ('You must have only one key field, ' 'not multiple, not multilingual') if not (field_cls.is_stored and field_cls.is_indexed): raise ValueError, ('the key field must be stored ' 'and indexed') self._key_field = name info['key_field'] = True # Stored ? if getattr(field_cls, 'is_stored', False): info['value'] = self._value_nb self._value_nb += 1 # Indexed ? if getattr(field_cls, 'is_indexed', False): info['prefix'] = _get_prefix(self._prefix_nb) self._prefix_nb += 1 return info def _load_all_internal(self): """Load the metadata from the database """ self._key_field = None self._value_nb = 0 self._prefix_nb = 0 metadata = self._db.get_metadata('metadata') if metadata == '': self._metadata = {} else: self._metadata = loads(metadata) for name, info in self._metadata.iteritems(): if 'key_field' in info: self._key_field = name if 'value' in info: self._value_nb += 1 if 'prefix' in info: self._prefix_nb += 1 def _query2xquery(self, query): """take a "itools" query and return a "xapian" query """ query_class = query.__class__ fields = self._fields metadata = self._metadata # All Query if query_class is AllQuery: return Query('') # PhraseQuery, the field must be indexed if query_class is PhraseQuery: name = query.name if type(name) is not str: raise TypeError, "unexpected '%s'" % type(name) # If there is a problem => an empty result if name not in metadata: return Query() info = metadata[name] try: prefix = info['prefix'] except KeyError: raise ValueError, 'the field "%s" must be indexed' % name field_cls = _get_field_cls(name, fields, info) return _make_PhraseQuery(field_cls, query.value, prefix) # RangeQuery, the field must be stored if query_class is RangeQuery: name = query.name if type(name) is not str: raise TypeError, "unexpected '%s'" % type(name) # If there is a problem => an empty result if name not in metadata: return Query() info = metadata[name] value = info['value'] field_cls = _get_field_cls(name, fields, info) left = query.left right = query.right # Case 1: no limits, return everything if left is None and right is None: return Query('') # Case 2: left limit only if right is None: return Query(OP_VALUE_GE, value, _encode(field_cls, left)) # Case 3: right limit only if left is None: return Query(OP_VALUE_LE, value, _encode(field_cls, right)) # Case 4: left and right return Query(OP_VALUE_RANGE, value, _encode(field_cls, left), _encode(field_cls, right)) # StartQuery, the field must be stored if query_class is StartQuery: name = query.name if type(name) is not str: raise TypeError, "unexpected '%s'" % type(name) # If there is a problem => an empty result if name not in metadata: return Query() info = metadata[name] value_nb = info['value'] field_cls = _get_field_cls(name, fields, info) value = query.value value = _encode(field_cls, value) if value: # good = {x / x >= value} good = Query(OP_VALUE_GE, value_nb, value) # Construct the variable end_value: # end_value = the word "after" value: toto => totp # Delete the '\xff' at the end of value end_value = value while end_value and ord(end_value[-1]) == 255: end_value = end_value[:-1] # Normal case: end_value is not empty if end_value: # The world after end_value = end_value[:-1] + chr(ord(end_value[-1]) + 1) # bad = {x / x >= end_value} bad = Query(OP_VALUE_GE, value_nb, end_value) # Return {x / x in good but x not in bad} return Query(OP_AND_NOT, good, bad) # If end_value is empty else: # Return {x / x in good} return good else: # If value == '', we return everything return Query('') # And i2x = self._query2xquery if query_class is AndQuery: return Query(OP_AND, [ i2x(q) for q in query.atoms ]) # Or if query_class is OrQuery: return Query(OP_OR, [ i2x(q) for q in query.atoms ]) # Not if query_class is NotQuery: return Query(OP_AND_NOT, Query(''), i2x(query.query))