Python Lexicon.Lexicon 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: zope.index.text.lexicon

클래스/타입: Lexicon

메소드/함수: Lexicon

hotexamples.com에서의 예제들: 11

Python Lexicon.Lexicon - 11개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 zope.index.text.lexicon.Lexicon.Lexicon에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Lexicon(11)

termToWordIds(6)

sourceToWordIds(5)

SplitterFunc(1)

wordCount(1)

예제 #1

파일 보기

파일: test_baseindex.py 프로젝트: wildcardcorp/zope.index

 def _makeOne(self, family=None):
     from zope.index.text.lexicon import Lexicon
     from zope.index.text.lexicon import Splitter
     if family is None:
         family = self._getBTreesFamily()
     lexicon = Lexicon(Splitter())
     return self._getTargetClass()(lexicon, family=family)

예제 #2

파일 보기

    def tfIdfBlock(self, data, field): 
        '''Creates TF/IDF canopy of a given set of data'''

        class CustomStopWordRemover(object):
            stop_words = self.stop_words[field].copy()

            def process(self, lst):
                return [w for w in lst if not w in self.stop_words]

        index = TextIndex(Lexicon(Splitter(), CustomStopWordRemover()))

        index.index = CosineIndex(index.lexicon)

        index_to_id = {}
        base_tokens = {}

        for i, (record_id, doc) in enumerate(data, 1) :
            index_to_id[i] = record_id
            base_tokens[i] = doc
            index.index_doc(i, doc)

        canopies = (tfidf._createCanopies(index,
                                          base_tokens, 
                                          threshold, 
                                          field)
                    for threshold in self.tfidf_fields[field])

        for canopy in canopies :
            key, index_canopy = canopy
            id_canopy = dict((index_to_id[k], index_to_id[v]) 
                             for k,v in index_canopy.iteritems())
            self.canopies[key] = defaultdict(str, id_canopy)

예제 #3

파일 보기

파일: catalog.py 프로젝트: Jickelsen/Arche

def _default_indexes():
    return {
        'title':
        CatalogFieldIndex(get_title),
        'description':
        CatalogFieldIndex(get_description),
        'type_name':
        CatalogFieldIndex(get_type_name),
        'sortable_title':
        CatalogFieldIndex(get_sortable_title),
        'path':
        CatalogPathIndex(get_path),
        'searchable_text':
        CatalogTextIndex(get_searchable_text,
                         lexicon=Lexicon(Splitter(), CaseNormalizer())),
        'uid':
        CatalogFieldIndex(get_uid),
        'tags':
        CatalogKeywordIndex(get_tags),
        'search_visible':
        CatalogFieldIndex(get_search_visible),
        'date':
        CatalogFieldIndex(get_date),
        'modified':
        CatalogFieldIndex(get_modified),
        'created':
        CatalogFieldIndex(get_created),
        'wf_state':
        CatalogFieldIndex(get_wf_state),
        'workflow':
        CatalogFieldIndex(get_workflow),
    }.items()

예제 #4

파일 보기

 def _makeIndexAndParser(self):
     from zope.index.text.lexicon import Lexicon
     from zope.index.text.lexicon import Splitter
     from zope.index.text.queryparser import QueryParser
     lexicon = Lexicon(Splitter())
     parser = QueryParser(lexicon)
     index = FauxIndex()
     return index, parser

예제 #5

파일 보기

파일: tfidf.py 프로젝트: dwyerk/dedupe

    def __init__(self, field, stop_words=[]):
        self.field = field

        splitter = Splitter()
        stop_word_remover = CustomStopWordRemover(stop_words)
        operator_escaper = OperatorEscaper()
        lexicon = Lexicon(splitter, stop_word_remover, operator_escaper)

        self._index = TextIndex(lexicon)
        self._index.index = CosineIndex(self._index.lexicon)

        self._i_to_id = {}
        self._parseTerms = self._index.lexicon.parseTerms

예제 #6

파일 보기

파일: textindex.py 프로젝트: wildcardcorp/zope.index

    def __init__(self, lexicon=None, index=None):
        """Provisional constructor.

        This creates the lexicon and index if not passed in.
        """
        _explicit_lexicon = True
        if lexicon is None:
            _explicit_lexicon = False
            lexicon = Lexicon(Splitter(), CaseNormalizer(), StopWordRemover())
        if index is None:
            index = OkapiIndex(lexicon)
        self.lexicon = _explicit_lexicon and lexicon or index.lexicon
        self.index = index

예제 #7

파일 보기

파일: __init__.py 프로젝트: mostscript/uu.retrieval

 def __init__(self, discriminator, lexicon=None, index=None):
     _lexicon = lexicon
     if lexicon is None:
         _lexicon = Lexicon(
             Splitter(),
             CaseNormalizer(),
             StopWordRemover(),
         )
     if index is None:
         index = OkapiIndex(_lexicon, family=self.family)
     super(TextIndex, self).__init__(discriminator, lexicon, index)
     if lexicon is None:
         self.lexicon = index.lexicon
     self.index = index
     self.clear()

예제 #8

파일 보기

def stopWords(data) :
    index = TextIndex(Lexicon(Splitter()))

    for i, (_, doc) in enumerate(data, 1) :
        index.index_doc(i, doc)

    doc_freq = [(len(index.index._wordinfo[wid]), word) 
                for word, wid in index.lexicon.items()]

    doc_freq.sort(reverse=True)

    N = float(index.index.documentCount())
    threshold = int(max(1000, N * 0.05))

    stop_words = set([])

    for frequency, word in doc_freq :
        if frequency > threshold :
            stop_words.add(word)
        else :
            break

    return stop_words

예제 #9

파일 보기

파일: test_textindex.py 프로젝트: wildcardcorp/zope.index

 def _makeLexicon(self, *pipeline):
     from zope.index.text.lexicon import Lexicon
     from zope.index.text.lexicon import Splitter
     if not pipeline:
         pipeline = (Splitter(),)
     return Lexicon(*pipeline)

예제 #10

파일 보기

파일: test_queryparser.py 프로젝트: wildcardcorp/zope.index

 def _makeLexicon(self):
     from zope.index.text.lexicon import Lexicon
     return Lexicon(*self._makePipeline())

예제 #11

파일 보기

파일: fulltextsearch.py 프로젝트: marticongost/cocktail

def _create_full_text_index(self, language):
    lexicon = Lexicon(
        FullTextIndexProcessor(language, self.stemming)
    )
    return OkapiIndex(lexicon)