def testSearchAllFields(self):
     o1 = Mock('en', text=u'The quick brown fox', title=u'Fox')
     o2 = Mock('en', text=u'Mary had a little lamb.', title=u'Quick Mary')
     o3 = Mock('en', text=u'Pop goes the weasel!', title=u'Weasel')
     I = Index(fields=('title', 'text'), languages=('en',))
     I.index_object(o1, 1)
     I.index_object(o2, 2)
     I.index_object(o3, 3)
     res = I.search(u'quick')
     self.assertEquals([2], list(res.getDocids()))
     res = I.search(u'quick', search_all_fields=True)
     self.assertEquals([1, 2], list(res.getDocids()))
     self.assertRaises(
         ValueError,
         I.search, u'quick', field='text', search_all_fields=True)
示例#2
0
 def testSearchAllFields(self):
     o1 = Mock('en', text=u'The quick brown fox', title=u'Fox')
     o2 = Mock('en', text=u'Mary had a little lamb.', title=u'Quick Mary')
     o3 = Mock('en', text=u'Pop goes the weasel!', title=u'Weasel')
     I = Index(fields=('title', 'text'), languages=('en', ))
     I.index_object(o1, 1)
     I.index_object(o2, 2)
     I.index_object(o3, 3)
     res = I.search(u'quick')
     self.assertEquals([2], list(res.getDocids()))
     res = I.search(u'quick', search_all_fields=True)
     self.assertEquals([1, 2], list(res.getDocids()))
     self.assertRaises(ValueError,
                       I.search,
                       u'quick',
                       field='text',
                       search_all_fields=True)
class TextIndexNG3(SimpleItem, PropertyManager):

    implements(ITextIndexNG3, IPluggableIndex)

    meta_type = 'TextIndexNG3'
    default_encoding = 'iso-8859-15'    # I think we don't need this anymore
    management_page_charset = 'utf-8'   # needed for several ZMI methods
    manage_options = ( {'label' : 'Index', 'action': 'manage_workspace'},
                       {'label' : 'Vocabulary', 'action' : 'vocabularyform'},
                       {'label' : 'Test', 'action' : 'queryform'},
                       {'label' : 'Converters', 'action' : 'converters'},
                       {'label' : 'Thesaurus', 'action' : 'thesaurus'},
                       {'label' : 'Adapters', 'action' : 'adapters'},
                     ) +\
                     SimpleItem.manage_options + \
                     PropertyManager.manage_options    

    query_options = ('query', 'encoding', 'parser', 'language', 'field',
                     'autoexpand', 'similarity_ratio',
                     'ranking', 'ranking_maxhits', 'thesaurus',
                     'search_all_fields')

    def __init__(self, id, extra, caller):
        self.id = id
        self.title = id

        # fields
        fields = [id] # default
        if get(extra, 'fields', []):
            fields = get(extra, 'fields')

        self.index = Index(fields=fields,
                           lexicon=get(extra, 'lexicon', DEFAULT_LEXICON),
                           storage=get(extra, 'storage', DEFAULT_STORAGE),
                           splitter=get(extra, 'splitter', DEFAULT_SPLITTER),
                           autoexpand=get(extra, 'autoexpand', 'off'),
                           autoexpand_limit=get(extra, 'autoexpand_limit', 4),
                           query_parser=get(extra, 'query_parser', 'txng.parsers.en'),
                           use_stemmer=get(extra, 'use_stemmer', False),
                           languages=get(extra, 'languages', ('en',)),
                           use_stopwords=bool(get(extra, 'use_stopwords')),
                           default_encoding=get(extra, 'default_encoding', DEFAULT_ENCODING),
                           use_normalizer=bool(get(extra, 'use_normalizer')),
                           dedicated_storage=bool(get(extra, 'dedicated_storage')),
                           splitter_casefolding=bool(get(extra, 'splitter_casefolding', True)),
                           splitter_additional_chars=get(extra, 'splitter_additional_chars', DEFAULT_ADDITIONAL_CHARS),
                           index_unknown_languages=bool(get(extra, 'index_unknown_languages', True)),
                           ranking=bool(get(extra, 'ranking')),
                           ranking_method=(get(extra, 'ranking_method', DEFAULT_RANKING)),
                           )

    def clear(self):
        """ clear the index """
        self.index.clear()

    def index_object(self, docid, obj, threshold=None):
        result = self.index.index_object(obj, docid)
        return int(result)

    def unindex_object(self, docid):
        self.index.unindex_object(docid)
        return 1

    def getIndexSourceNames(self):
        """ return indexed fields """
        return self.index.fields

    def indexSize(self):
        return len(self.index.getLexicon())

    def getEntryForObject(self, docid, default=None):
        """Get all information contained for 'docid'.

        Returns a string representing a mapping field -> list of indexed words
        for dedicated storages or a list of indexed words for shared storage.
        """
        getWord = self.index.getLexicon().getWord
        d = {}
        for field in self.index.fields:
            try:
                wids = self.index.getStorage(field).getWordIdsForDocId(docid)
            except StorageException:
                wids = ()
            words = [getWord(wid) for wid in wids]
            d[field] = words
        if not self.index.dedicated_storage:
            return repr(d[self.index.fields[0]])
        return repr(d)

    def _apply_index(self, request, cid=''):

        # parse the query options
        record = parseIndexRequest(request, self.getId(), self.query_options)
        if record.keys is None: 
            return None

        # prepare query (must be unicode string)
        query = record.keys[0]
        if not isinstance(query, unicode):
            query = unicode(query, record.get('encoding', self.index.default_encoding), 'ignore')
        if not query:
            return None

        # options
        options = {}
        for k in ('parser', 'language', 'field', 'autoexpand',
                  'similarity_ratio', 'thesaurus', 'ranking',
                  'ranking_maxhits', 'search_all_fields'):
            v = getattr(record, k, marker)
            if v is not marker:
                options[k] = v

        result = self.index.search(query, **options)                                               
        ranked_resultset = result.getRankedResults()
        if ranked_resultset:
            return ranked_resultset, self.id
        else:
            return result.getDocids(), self.id

    def __len__(self):
        return len(self.index)
    numObjects = __len__

    def manage_workspace(self, REQUEST):
        """ redirect to manage since we can not override manage_workspace
            through a Five browser view
        """
        from zope.component import getMultiAdapter
        view = getMultiAdapter((self, REQUEST), name='manageform')
        return view() 
class TextIndexNG3(SimpleItem, PropertyManager):

    implements(ITextIndexNG3, IPluggableIndex)

    meta_type = 'TextIndexNG3'
    default_encoding = 'iso-8859-15'  # I think we don't need this anymore
    management_page_charset = 'utf-8'  # needed for several ZMI methods
    manage_options = ( {'label' : 'Index', 'action': 'manage_workspace'},
                       {'label' : 'Vocabulary', 'action' : 'vocabularyform'},
                       {'label' : 'Test', 'action' : 'queryform'},
                       {'label' : 'Converters', 'action' : 'converters'},
                       {'label' : 'Thesaurus', 'action' : 'thesaurus'},
                       {'label' : 'Adapters', 'action' : 'adapters'},
                     ) +\
                     SimpleItem.manage_options + \
                     PropertyManager.manage_options

    query_options = ('query', 'encoding', 'parser', 'language', 'field',
                     'autoexpand', 'similarity_ratio', 'ranking',
                     'ranking_maxhits', 'thesaurus', 'search_all_fields')

    def __init__(self, id, extra, caller):
        self.id = id
        self.title = id

        # fields
        fields = [id]  # default
        if get(extra, 'fields', []):
            fields = get(extra, 'fields')

        self.index = Index(
            fields=fields,
            lexicon=get(extra, 'lexicon', DEFAULT_LEXICON),
            storage=get(extra, 'storage', DEFAULT_STORAGE),
            splitter=get(extra, 'splitter', DEFAULT_SPLITTER),
            autoexpand=get(extra, 'autoexpand', 'off'),
            autoexpand_limit=get(extra, 'autoexpand_limit', 4),
            query_parser=get(extra, 'query_parser', 'txng.parsers.en'),
            use_stemmer=get(extra, 'use_stemmer', False),
            languages=get(extra, 'languages', ('en', )),
            use_stopwords=bool(get(extra, 'use_stopwords')),
            default_encoding=get(extra, 'default_encoding', DEFAULT_ENCODING),
            use_normalizer=bool(get(extra, 'use_normalizer')),
            dedicated_storage=bool(get(extra, 'dedicated_storage')),
            splitter_casefolding=bool(get(extra, 'splitter_casefolding',
                                          True)),
            splitter_additional_chars=get(extra, 'splitter_additional_chars',
                                          DEFAULT_ADDITIONAL_CHARS),
            index_unknown_languages=bool(
                get(extra, 'index_unknown_languages', True)),
            ranking=bool(get(extra, 'ranking')),
            ranking_method=(get(extra, 'ranking_method', DEFAULT_RANKING)),
        )

    def clear(self):
        """ clear the index """
        self.index.clear()

    def index_object(self, docid, obj, threshold=None):
        result = self.index.index_object(obj, docid)
        return int(result)

    def unindex_object(self, docid):
        self.index.unindex_object(docid)
        return 1

    def getIndexSourceNames(self):
        """ return indexed fields """
        return self.index.fields

    def getIndexQueryNames(self):
        """ Return queryable parameters """
        return [self.id]

    def indexSize(self):
        return len(self.index.getLexicon())

    def getEntryForObject(self, docid, default=None):
        """Get all information contained for 'docid'.

        Returns a string representing a mapping field -> list of indexed words
        for dedicated storages or a list of indexed words for shared storage.
        """
        getWord = self.index.getLexicon().getWord
        d = {}
        for field in self.index.fields:
            try:
                wids = self.index.getStorage(field).getWordIdsForDocId(docid)
            except StorageException:
                wids = ()
            words = [getWord(wid) for wid in wids]
            d[field] = words
        if not self.index.dedicated_storage:
            return repr(d[self.index.fields[0]])
        return repr(d)

    def _apply_index(self, request, cid=''):

        # parse the query options
        record = parseIndexRequest(request, self.getId(), self.query_options)
        if record.keys is None:
            return None

        # prepare query (must be unicode string)
        query = record.keys[0]
        if not isinstance(query, str):
            query = str(query,
                        record.get('encoding', self.index.default_encoding),
                        'ignore')
        if not query:
            return None

        # options
        options = {}
        for k in ('parser', 'language', 'field', 'autoexpand',
                  'similarity_ratio', 'thesaurus', 'ranking',
                  'ranking_maxhits', 'search_all_fields'):
            v = getattr(record, k, marker)
            if v is not marker:
                options[k] = v

        result = self.index.search(query, **options)
        ranked_resultset = result.getRankedResults()
        if ranked_resultset:
            return ranked_resultset, self.id
        else:
            return result.getDocids(), self.id

    def __len__(self):
        return len(self.index)

    numObjects = __len__

    def manage_workspace(self, REQUEST):
        """ redirect to manage since we can not override manage_workspace
            through a Five browser view
        """
        from zope.component import getMultiAdapter
        view = getMultiAdapter((self, REQUEST), name='manageform')
        return view()
示例#5
0
class TingIndex(zope.catalog.text.TextIndex, 
                persistent.Persistent):

    zope.interface.implements(
        zope.index.interfaces.IInjection,
        zope.index.interfaces.IStatistics,
        zope.index.interfaces.IIndexSearch,
        ITingIndex)
    
    def __init__(self,
                 field_name=None,
                 interface=None,
                 field_callable=False,
                 use_stemmer=defaults['use_stemmer'],
                 dedicated_storage=defaults['dedicated_storage'],
                 ranking=defaults['ranking'],
                 use_normalizer=defaults['use_normalizer'],
                 languages=defaults['languages'],
                 use_stopwords=defaults['use_stopwords'],
                 autoexpand_limit=defaults['autoexpand_limit'],
                 splitter=defaults['splitter'],
                 index_unknown_languages=defaults['index_unknown_languages'],
                 query_parser=defaults['query_parser'],
                 lexicon=defaults['lexicon'],
                 splitter_additional_chars=defaults['splitter_add_chars'],
                 storage=defaults['storage'],
                 splitter_casefolding=defaults['splitter_casefolding'],
                 asIFSet=True):
        if ranking:
            util = createObject(storage)
            if not IStorageWithTermFrequency.providedBy(util):
                raise ValueError("This storage cannot be used for ranking")
        if isinstance(field_name, basestring):
            _fields = field_name.split(' ')
        else:
            _fields = field_name
        zope.catalog.attribute.AttributeIndex.__init__(
            self, _fields[0], interface, field_callable)
        if len(_fields) < 2:
            dedicated_storage = False
        _default_fields = [_fields[0]]
        self._index = Index(
            fields=_fields,
            languages=languages.split(' '),
            use_stemmer=use_stemmer,
            dedicated_storage=dedicated_storage,
            ranking=ranking,
            use_normalizer=use_normalizer,
            use_stopwords=use_stopwords,
            storage=storage,
            autoexpand_limit=autoexpand_limit,
            splitter=splitter,
            lexicon=lexicon,
            index_unknown_languages=index_unknown_languages,
            query_parser=query_parser,
            splitter_additional_chars=splitter_additional_chars,
            splitter_casefolding=splitter_casefolding
        )
        self.languages=languages
        self.use_stemmer=use_stemmer
        self.dedicated_storage=dedicated_storage
        self.ranking=ranking
        self.use_normalizer=use_normalizer
        self.use_stopwords=use_stopwords
        self.interface = interface
        self.storage=storage
        self.autoexpand_limit=autoexpand_limit
        self.default_fields=_default_fields
        self._fields=_fields
        self.splitter=splitter
        self.lexicon=lexicon
        self.index_unknown_languages=index_unknown_languages
        self.query_parser=query_parser
        self.splitter_additional_chars=splitter_additional_chars
        self.splitter_casefolding=splitter_casefolding
        self._asIFSet = asIFSet

    def clear(self):
        self._index.clear()

    def documentCount(self):
        """See interface IStatistics
        """
        return len(self._index.getStorage(self.default_fields[0]))

    def wordCount(self):
        """See interface IStatistics
        """
        return len(self._index.getLexicon())

    def index_doc(self, docid, value):
        """See interface IInjection
        """
        if value is not None:
            self._index.index_object(value, docid)

    def unindex_doc(self, docid):
        """See interface IInjection
        """
        self._index.unindex_object(docid)

    def apply(self, query):
        kw = dict()
        if isinstance(query, dict):
            kw.update(query)
            query = kw['query']
            del kw['query']
        res = self._index.search(query, **kw).getDocids()
        if self._asIFSet:
            return BTrees.IFBTree.IFSet(res)
        return res
示例#6
0
class TingIndex(zope.catalog.text.TextIndex, persistent.Persistent):

    zope.interface.implements(zope.index.interfaces.IInjection,
                              zope.index.interfaces.IStatistics,
                              zope.index.interfaces.IIndexSearch, ITingIndex)

    def __init__(self,
                 field_name=None,
                 interface=None,
                 field_callable=False,
                 use_stemmer=defaults['use_stemmer'],
                 dedicated_storage=defaults['dedicated_storage'],
                 ranking=defaults['ranking'],
                 use_normalizer=defaults['use_normalizer'],
                 languages=defaults['languages'],
                 use_stopwords=defaults['use_stopwords'],
                 autoexpand_limit=defaults['autoexpand_limit'],
                 splitter=defaults['splitter'],
                 index_unknown_languages=defaults['index_unknown_languages'],
                 query_parser=defaults['query_parser'],
                 lexicon=defaults['lexicon'],
                 splitter_additional_chars=defaults['splitter_add_chars'],
                 storage=defaults['storage'],
                 splitter_casefolding=defaults['splitter_casefolding'],
                 asIFSet=True):
        if ranking:
            util = createObject(storage)
            if not IStorageWithTermFrequency.providedBy(util):
                raise ValueError("This storage cannot be used for ranking")
        if isinstance(field_name, basestring):
            _fields = field_name.split(' ')
        else:
            _fields = field_name
        zope.catalog.attribute.AttributeIndex.__init__(self, _fields[0],
                                                       interface,
                                                       field_callable)
        if len(_fields) < 2:
            dedicated_storage = False
        _default_fields = [_fields[0]]
        self._index = Index(
            fields=_fields,
            languages=languages.split(' '),
            use_stemmer=use_stemmer,
            dedicated_storage=dedicated_storage,
            ranking=ranking,
            use_normalizer=use_normalizer,
            use_stopwords=use_stopwords,
            storage=storage,
            autoexpand_limit=autoexpand_limit,
            splitter=splitter,
            lexicon=lexicon,
            index_unknown_languages=index_unknown_languages,
            query_parser=query_parser,
            splitter_additional_chars=splitter_additional_chars,
            splitter_casefolding=splitter_casefolding)
        self.languages = languages
        self.use_stemmer = use_stemmer
        self.dedicated_storage = dedicated_storage
        self.ranking = ranking
        self.use_normalizer = use_normalizer
        self.use_stopwords = use_stopwords
        self.interface = interface
        self.storage = storage
        self.autoexpand_limit = autoexpand_limit
        self.default_fields = _default_fields
        self._fields = _fields
        self.splitter = splitter
        self.lexicon = lexicon
        self.index_unknown_languages = index_unknown_languages
        self.query_parser = query_parser
        self.splitter_additional_chars = splitter_additional_chars
        self.splitter_casefolding = splitter_casefolding
        self._asIFSet = asIFSet

    def clear(self):
        self._index.clear()

    def documentCount(self):
        """See interface IStatistics
        """
        return len(self._index.getStorage(self.default_fields[0]))

    def wordCount(self):
        """See interface IStatistics
        """
        return len(self._index.getLexicon())

    def index_doc(self, docid, value):
        """See interface IInjection
        """
        if value is not None:
            self._index.index_object(value, docid)

    def unindex_doc(self, docid):
        """See interface IInjection
        """
        self._index.unindex_object(docid)

    def apply(self, query):
        kw = dict()
        if isinstance(query, dict):
            kw.update(query)
            query = kw['query']
            del kw['query']
        res = self._index.search(query, **kw).getDocids()
        if self._asIFSet:
            return BTrees.IFBTree.IFSet(res)
        return res