def testSearchAllFields(self): o1 = Mock('en', text=u'The quick brown fox', title=u'Fox') o2 = Mock('en', text=u'Mary had a little lamb.', title=u'Quick Mary') o3 = Mock('en', text=u'Pop goes the weasel!', title=u'Weasel') I = Index(fields=('title', 'text'), languages=('en',)) I.index_object(o1, 1) I.index_object(o2, 2) I.index_object(o3, 3) res = I.search(u'quick') self.assertEquals([2], list(res.getDocids())) res = I.search(u'quick', search_all_fields=True) self.assertEquals([1, 2], list(res.getDocids())) self.assertRaises( ValueError, I.search, u'quick', field='text', search_all_fields=True)
def testSearchAllFields(self): o1 = Mock('en', text=u'The quick brown fox', title=u'Fox') o2 = Mock('en', text=u'Mary had a little lamb.', title=u'Quick Mary') o3 = Mock('en', text=u'Pop goes the weasel!', title=u'Weasel') I = Index(fields=('title', 'text'), languages=('en', )) I.index_object(o1, 1) I.index_object(o2, 2) I.index_object(o3, 3) res = I.search(u'quick') self.assertEquals([2], list(res.getDocids())) res = I.search(u'quick', search_all_fields=True) self.assertEquals([1, 2], list(res.getDocids())) self.assertRaises(ValueError, I.search, u'quick', field='text', search_all_fields=True)
class TextIndexNG3(SimpleItem, PropertyManager): implements(ITextIndexNG3, IPluggableIndex) meta_type = 'TextIndexNG3' default_encoding = 'iso-8859-15' # I think we don't need this anymore management_page_charset = 'utf-8' # needed for several ZMI methods manage_options = ( {'label' : 'Index', 'action': 'manage_workspace'}, {'label' : 'Vocabulary', 'action' : 'vocabularyform'}, {'label' : 'Test', 'action' : 'queryform'}, {'label' : 'Converters', 'action' : 'converters'}, {'label' : 'Thesaurus', 'action' : 'thesaurus'}, {'label' : 'Adapters', 'action' : 'adapters'}, ) +\ SimpleItem.manage_options + \ PropertyManager.manage_options query_options = ('query', 'encoding', 'parser', 'language', 'field', 'autoexpand', 'similarity_ratio', 'ranking', 'ranking_maxhits', 'thesaurus', 'search_all_fields') def __init__(self, id, extra, caller): self.id = id self.title = id # fields fields = [id] # default if get(extra, 'fields', []): fields = get(extra, 'fields') self.index = Index(fields=fields, lexicon=get(extra, 'lexicon', DEFAULT_LEXICON), storage=get(extra, 'storage', DEFAULT_STORAGE), splitter=get(extra, 'splitter', DEFAULT_SPLITTER), autoexpand=get(extra, 'autoexpand', 'off'), autoexpand_limit=get(extra, 'autoexpand_limit', 4), query_parser=get(extra, 'query_parser', 'txng.parsers.en'), use_stemmer=get(extra, 'use_stemmer', False), languages=get(extra, 'languages', ('en',)), use_stopwords=bool(get(extra, 'use_stopwords')), default_encoding=get(extra, 'default_encoding', DEFAULT_ENCODING), use_normalizer=bool(get(extra, 'use_normalizer')), dedicated_storage=bool(get(extra, 'dedicated_storage')), splitter_casefolding=bool(get(extra, 'splitter_casefolding', True)), splitter_additional_chars=get(extra, 'splitter_additional_chars', DEFAULT_ADDITIONAL_CHARS), index_unknown_languages=bool(get(extra, 'index_unknown_languages', True)), ranking=bool(get(extra, 'ranking')), ranking_method=(get(extra, 'ranking_method', DEFAULT_RANKING)), ) def clear(self): """ clear the index """ self.index.clear() def index_object(self, docid, obj, threshold=None): result = self.index.index_object(obj, docid) return int(result) def unindex_object(self, docid): self.index.unindex_object(docid) return 1 def getIndexSourceNames(self): """ return indexed fields """ return self.index.fields def indexSize(self): return len(self.index.getLexicon()) def getEntryForObject(self, docid, default=None): """Get all information contained for 'docid'. Returns a string representing a mapping field -> list of indexed words for dedicated storages or a list of indexed words for shared storage. """ getWord = self.index.getLexicon().getWord d = {} for field in self.index.fields: try: wids = self.index.getStorage(field).getWordIdsForDocId(docid) except StorageException: wids = () words = [getWord(wid) for wid in wids] d[field] = words if not self.index.dedicated_storage: return repr(d[self.index.fields[0]]) return repr(d) def _apply_index(self, request, cid=''): # parse the query options record = parseIndexRequest(request, self.getId(), self.query_options) if record.keys is None: return None # prepare query (must be unicode string) query = record.keys[0] if not isinstance(query, unicode): query = unicode(query, record.get('encoding', self.index.default_encoding), 'ignore') if not query: return None # options options = {} for k in ('parser', 'language', 'field', 'autoexpand', 'similarity_ratio', 'thesaurus', 'ranking', 'ranking_maxhits', 'search_all_fields'): v = getattr(record, k, marker) if v is not marker: options[k] = v result = self.index.search(query, **options) ranked_resultset = result.getRankedResults() if ranked_resultset: return ranked_resultset, self.id else: return result.getDocids(), self.id def __len__(self): return len(self.index) numObjects = __len__ def manage_workspace(self, REQUEST): """ redirect to manage since we can not override manage_workspace through a Five browser view """ from zope.component import getMultiAdapter view = getMultiAdapter((self, REQUEST), name='manageform') return view()
class TextIndexNG3(SimpleItem, PropertyManager): implements(ITextIndexNG3, IPluggableIndex) meta_type = 'TextIndexNG3' default_encoding = 'iso-8859-15' # I think we don't need this anymore management_page_charset = 'utf-8' # needed for several ZMI methods manage_options = ( {'label' : 'Index', 'action': 'manage_workspace'}, {'label' : 'Vocabulary', 'action' : 'vocabularyform'}, {'label' : 'Test', 'action' : 'queryform'}, {'label' : 'Converters', 'action' : 'converters'}, {'label' : 'Thesaurus', 'action' : 'thesaurus'}, {'label' : 'Adapters', 'action' : 'adapters'}, ) +\ SimpleItem.manage_options + \ PropertyManager.manage_options query_options = ('query', 'encoding', 'parser', 'language', 'field', 'autoexpand', 'similarity_ratio', 'ranking', 'ranking_maxhits', 'thesaurus', 'search_all_fields') def __init__(self, id, extra, caller): self.id = id self.title = id # fields fields = [id] # default if get(extra, 'fields', []): fields = get(extra, 'fields') self.index = Index( fields=fields, lexicon=get(extra, 'lexicon', DEFAULT_LEXICON), storage=get(extra, 'storage', DEFAULT_STORAGE), splitter=get(extra, 'splitter', DEFAULT_SPLITTER), autoexpand=get(extra, 'autoexpand', 'off'), autoexpand_limit=get(extra, 'autoexpand_limit', 4), query_parser=get(extra, 'query_parser', 'txng.parsers.en'), use_stemmer=get(extra, 'use_stemmer', False), languages=get(extra, 'languages', ('en', )), use_stopwords=bool(get(extra, 'use_stopwords')), default_encoding=get(extra, 'default_encoding', DEFAULT_ENCODING), use_normalizer=bool(get(extra, 'use_normalizer')), dedicated_storage=bool(get(extra, 'dedicated_storage')), splitter_casefolding=bool(get(extra, 'splitter_casefolding', True)), splitter_additional_chars=get(extra, 'splitter_additional_chars', DEFAULT_ADDITIONAL_CHARS), index_unknown_languages=bool( get(extra, 'index_unknown_languages', True)), ranking=bool(get(extra, 'ranking')), ranking_method=(get(extra, 'ranking_method', DEFAULT_RANKING)), ) def clear(self): """ clear the index """ self.index.clear() def index_object(self, docid, obj, threshold=None): result = self.index.index_object(obj, docid) return int(result) def unindex_object(self, docid): self.index.unindex_object(docid) return 1 def getIndexSourceNames(self): """ return indexed fields """ return self.index.fields def getIndexQueryNames(self): """ Return queryable parameters """ return [self.id] def indexSize(self): return len(self.index.getLexicon()) def getEntryForObject(self, docid, default=None): """Get all information contained for 'docid'. Returns a string representing a mapping field -> list of indexed words for dedicated storages or a list of indexed words for shared storage. """ getWord = self.index.getLexicon().getWord d = {} for field in self.index.fields: try: wids = self.index.getStorage(field).getWordIdsForDocId(docid) except StorageException: wids = () words = [getWord(wid) for wid in wids] d[field] = words if not self.index.dedicated_storage: return repr(d[self.index.fields[0]]) return repr(d) def _apply_index(self, request, cid=''): # parse the query options record = parseIndexRequest(request, self.getId(), self.query_options) if record.keys is None: return None # prepare query (must be unicode string) query = record.keys[0] if not isinstance(query, str): query = str(query, record.get('encoding', self.index.default_encoding), 'ignore') if not query: return None # options options = {} for k in ('parser', 'language', 'field', 'autoexpand', 'similarity_ratio', 'thesaurus', 'ranking', 'ranking_maxhits', 'search_all_fields'): v = getattr(record, k, marker) if v is not marker: options[k] = v result = self.index.search(query, **options) ranked_resultset = result.getRankedResults() if ranked_resultset: return ranked_resultset, self.id else: return result.getDocids(), self.id def __len__(self): return len(self.index) numObjects = __len__ def manage_workspace(self, REQUEST): """ redirect to manage since we can not override manage_workspace through a Five browser view """ from zope.component import getMultiAdapter view = getMultiAdapter((self, REQUEST), name='manageform') return view()
class TingIndex(zope.catalog.text.TextIndex, persistent.Persistent): zope.interface.implements( zope.index.interfaces.IInjection, zope.index.interfaces.IStatistics, zope.index.interfaces.IIndexSearch, ITingIndex) def __init__(self, field_name=None, interface=None, field_callable=False, use_stemmer=defaults['use_stemmer'], dedicated_storage=defaults['dedicated_storage'], ranking=defaults['ranking'], use_normalizer=defaults['use_normalizer'], languages=defaults['languages'], use_stopwords=defaults['use_stopwords'], autoexpand_limit=defaults['autoexpand_limit'], splitter=defaults['splitter'], index_unknown_languages=defaults['index_unknown_languages'], query_parser=defaults['query_parser'], lexicon=defaults['lexicon'], splitter_additional_chars=defaults['splitter_add_chars'], storage=defaults['storage'], splitter_casefolding=defaults['splitter_casefolding'], asIFSet=True): if ranking: util = createObject(storage) if not IStorageWithTermFrequency.providedBy(util): raise ValueError("This storage cannot be used for ranking") if isinstance(field_name, basestring): _fields = field_name.split(' ') else: _fields = field_name zope.catalog.attribute.AttributeIndex.__init__( self, _fields[0], interface, field_callable) if len(_fields) < 2: dedicated_storage = False _default_fields = [_fields[0]] self._index = Index( fields=_fields, languages=languages.split(' '), use_stemmer=use_stemmer, dedicated_storage=dedicated_storage, ranking=ranking, use_normalizer=use_normalizer, use_stopwords=use_stopwords, storage=storage, autoexpand_limit=autoexpand_limit, splitter=splitter, lexicon=lexicon, index_unknown_languages=index_unknown_languages, query_parser=query_parser, splitter_additional_chars=splitter_additional_chars, splitter_casefolding=splitter_casefolding ) self.languages=languages self.use_stemmer=use_stemmer self.dedicated_storage=dedicated_storage self.ranking=ranking self.use_normalizer=use_normalizer self.use_stopwords=use_stopwords self.interface = interface self.storage=storage self.autoexpand_limit=autoexpand_limit self.default_fields=_default_fields self._fields=_fields self.splitter=splitter self.lexicon=lexicon self.index_unknown_languages=index_unknown_languages self.query_parser=query_parser self.splitter_additional_chars=splitter_additional_chars self.splitter_casefolding=splitter_casefolding self._asIFSet = asIFSet def clear(self): self._index.clear() def documentCount(self): """See interface IStatistics """ return len(self._index.getStorage(self.default_fields[0])) def wordCount(self): """See interface IStatistics """ return len(self._index.getLexicon()) def index_doc(self, docid, value): """See interface IInjection """ if value is not None: self._index.index_object(value, docid) def unindex_doc(self, docid): """See interface IInjection """ self._index.unindex_object(docid) def apply(self, query): kw = dict() if isinstance(query, dict): kw.update(query) query = kw['query'] del kw['query'] res = self._index.search(query, **kw).getDocids() if self._asIFSet: return BTrees.IFBTree.IFSet(res) return res
class TingIndex(zope.catalog.text.TextIndex, persistent.Persistent): zope.interface.implements(zope.index.interfaces.IInjection, zope.index.interfaces.IStatistics, zope.index.interfaces.IIndexSearch, ITingIndex) def __init__(self, field_name=None, interface=None, field_callable=False, use_stemmer=defaults['use_stemmer'], dedicated_storage=defaults['dedicated_storage'], ranking=defaults['ranking'], use_normalizer=defaults['use_normalizer'], languages=defaults['languages'], use_stopwords=defaults['use_stopwords'], autoexpand_limit=defaults['autoexpand_limit'], splitter=defaults['splitter'], index_unknown_languages=defaults['index_unknown_languages'], query_parser=defaults['query_parser'], lexicon=defaults['lexicon'], splitter_additional_chars=defaults['splitter_add_chars'], storage=defaults['storage'], splitter_casefolding=defaults['splitter_casefolding'], asIFSet=True): if ranking: util = createObject(storage) if not IStorageWithTermFrequency.providedBy(util): raise ValueError("This storage cannot be used for ranking") if isinstance(field_name, basestring): _fields = field_name.split(' ') else: _fields = field_name zope.catalog.attribute.AttributeIndex.__init__(self, _fields[0], interface, field_callable) if len(_fields) < 2: dedicated_storage = False _default_fields = [_fields[0]] self._index = Index( fields=_fields, languages=languages.split(' '), use_stemmer=use_stemmer, dedicated_storage=dedicated_storage, ranking=ranking, use_normalizer=use_normalizer, use_stopwords=use_stopwords, storage=storage, autoexpand_limit=autoexpand_limit, splitter=splitter, lexicon=lexicon, index_unknown_languages=index_unknown_languages, query_parser=query_parser, splitter_additional_chars=splitter_additional_chars, splitter_casefolding=splitter_casefolding) self.languages = languages self.use_stemmer = use_stemmer self.dedicated_storage = dedicated_storage self.ranking = ranking self.use_normalizer = use_normalizer self.use_stopwords = use_stopwords self.interface = interface self.storage = storage self.autoexpand_limit = autoexpand_limit self.default_fields = _default_fields self._fields = _fields self.splitter = splitter self.lexicon = lexicon self.index_unknown_languages = index_unknown_languages self.query_parser = query_parser self.splitter_additional_chars = splitter_additional_chars self.splitter_casefolding = splitter_casefolding self._asIFSet = asIFSet def clear(self): self._index.clear() def documentCount(self): """See interface IStatistics """ return len(self._index.getStorage(self.default_fields[0])) def wordCount(self): """See interface IStatistics """ return len(self._index.getLexicon()) def index_doc(self, docid, value): """See interface IInjection """ if value is not None: self._index.index_object(value, docid) def unindex_doc(self, docid): """See interface IInjection """ self._index.unindex_object(docid) def apply(self, query): kw = dict() if isinstance(query, dict): kw.update(query) query = kw['query'] del kw['query'] res = self._index.search(query, **kw).getDocids() if self._asIFSet: return BTrees.IFBTree.IFSet(res) return res