Exemplo n.º 1
0
 def updateIndexes(self):
     if not getattr(self, 'audit_lexicon', None):
         # installing, add lexicon, indexes and metadata
         self.addIndex('last_audited_date', 'DateIndex')
         self.addIndex('audited_action', 'KeywordIndex')
         self.addColumn('Title')
         self.addColumn('id')
         self.addColumn('UID')
         self.addColumn('last_audited_date')
         self.addColumn('audited_action')
         l = PLexicon('audit_lexicon', '', HTMLWordSplitter(),
                      CaseNormalizer(), StopWordRemover())
         self._setObject('audit_lexicon', l)
     catalog = portal_api.get_tool('portal_catalog')
     indexes = catalog._catalog.indexes
     for name, index in indexes.items():
         if name in self._catalog.indexes.keys():
             continue
         if index.meta_type == 'DateRecurringIndex':
             continue
         elif index.meta_type == 'ZCTextIndex':
             extras = Empty()
             extras.doc_attr = name
             extras.index_type = 'Okapi BM25 Rank'
             extras.lexicon_id = 'audit_lexicon'
             self.addIndex(name, index.meta_type, extras)
         else:
             self.addIndex(name, index.meta_type)
Exemplo n.º 2
0
class MySplitter:
    def __init__(self):
        self._v_splitter = HTMLWordSplitter()
    def __call__(self, text, stopdict, *args, **kwargs):
        words = self._v_splitter._split(text)
        def lookup(w):
            return stopdict.get(w, w)
        return filter(None, map(lookup, words))
Exemplo n.º 3
0
class MySplitter:
    def __init__(self):
        self._v_splitter = HTMLWordSplitter()
    def __call__(self, text, stopdict, *args, **kwargs):
        words = self._v_splitter._split(text)
        def lookup(w):
            return stopdict.get(w, w)
        return filter(None, map(lookup, words))
Exemplo n.º 4
0
def make_zc_index():
    # there's an elaborate dance necessary to construct an index
    class Struct:
        pass
    extra = Struct()
    extra.doc_attr = "read"
    extra.lexicon_id = "lexicon"
    caller = Struct()
    caller.lexicon = Lexicon(HTMLWordSplitter(), StopWordRemover())
    return ZCTextIndex("read", extra, caller)
Exemplo n.º 5
0
 def enumerateLexicons(self):
     return (
              ( 'plaintext_lexicon'
              , Splitter()
              , CaseNormalizer()
              , StopWordRemover()
              )
            , ( 'htmltext_lexicon'
              , HTMLWordSplitter()
              , CaseNormalizer()
              , StopWordRemover()
              )
            )
Exemplo n.º 6
0
 def testSplitterLocaleAwareness(self):
     from Products.ZCTextIndex.HTMLSplitter import HTMLWordSplitter
     import locale
     loc = locale.setlocale(locale.LC_ALL)  # get current locale
     # set German locale
     try:
         if sys.platform != 'win32':
             locale.setlocale(locale.LC_ALL, 'de_DE.ISO8859-1')
         else:
             locale.setlocale(locale.LC_ALL, 'German_Germany.1252')
     except locale.Error:
         return  # This test doesn't work here :-(
     expected = [
         'm\xfclltonne', 'waschb\xe4r', 'beh\xf6rde', '\xfcberflieger'
     ]
     words = [" ".join(expected)]
     words = Splitter().process(words)
     self.assertEqual(words, expected)
     words = HTMLWordSplitter().process(words)
     self.assertEqual(words, expected)
     locale.setlocale(locale.LC_ALL, loc)  # restore saved locale
Exemplo n.º 7
0
    def __init__(self, id='Help', title=''):
        self.id = id
        self.title = title
        c = self.catalog = ZCatalog('catalog')

        l = PLexicon('lexicon', '', HTMLWordSplitter(), CaseNormalizer(),
                     StopWordRemover())
        c._setObject('lexicon', l)
        i = ZCTextIndex('SearchableText',
                        caller=c,
                        index_factory=OkapiIndex,
                        lexicon_id=l.id)
        # not using c.addIndex because it depends on Product initialization
        c._catalog.addIndex('SearchableText', i)
        c._catalog.addIndex('categories', KeywordIndex('categories'))
        c._catalog.addIndex('permissions', KeywordIndex('permissions'))
        c.addColumn('categories')
        c.addColumn('permissions')
        c.addColumn('title_or_id')
        c.addColumn('url')
        c.addColumn('id')
Exemplo n.º 8
0
 def __init__(self):
     self._v_splitter = HTMLWordSplitter()
Exemplo n.º 9
0
 def __init__(self):
     self._v_splitter = HTMLWordSplitter()