Python Frequency 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: whoosh.formats

클래스/타입: Frequency

hotexamples.com에서의 예제들: 9

Python Frequency - 9개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 whoosh.formats.Frequency에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Frequency(8)

encode(1)

자주 사용되는 메소드들

Frequency (8)

encode (1)

예제 #1

파일 보기

파일: fields.py 프로젝트: oier/Yaki

    def __init__(self,
                 minsize=2,
                 maxsize=4,
                 stored=False,
                 field_boost=1.0,
                 tokenizer=None,
                 at=None,
                 queryor=False):
        """
        :param minsize: The minimum length of the N-grams.
        :param maxsize: The maximum length of the N-grams.
        :param stored: Whether to store the value of this field with the
            document. Since this field type generally contains a lot of text,
            you should avoid storing it with the document unless you need to,
            for example to allow fast excerpts in the search results.
        :param tokenizer: an instance of :class:`whoosh.analysis.Tokenizer`
            used to break the text into words.
        :param at: if 'start', only takes N-grams from the start of the word.
            If 'end', only takes N-grams from the end. Otherwise the default
            is to take all N-grams from each word.
        :param queryor: if True, combine the N-grams with an Or query. The
            default is to combine N-grams with an And query.
        """

        analyzer = NgramWordAnalyzer(minsize, maxsize, tokenizer, at=at)
        self.format = Frequency(analyzer=analyzer, field_boost=field_boost)
        self.stored = stored
        self.queryor = queryor

예제 #2

파일 보기

파일: test_postings.py 프로젝트: ChimmyTee/oh-mainline

def test_readwrite():
    with TempStorage("readwrite") as st:
        format = Frequency()
        postings = make_postings()
        
        postfile = st.create_file("readwrite")
        fpw = FilePostingWriter(postfile, blocklimit=8)
        fpw.start(format)
        for id, freq in postings:
            fpw.write(id, float(freq), format.encode(freq), 0)
        fpw.finish()
        fpw.close()
        
        postfile = st.open_file("readwrite")
        fpr = FilePostingReader(postfile, 0, format)
        assert_equal(postings, list(fpr.items_as("frequency")))
        postfile.close()

예제 #3

파일 보기

파일: test_postings.py 프로젝트: ws-os/oh-mainline

def test_readwrite():
    with TempStorage("readwrite") as st:
        format = Frequency()
        postings = make_postings()

        postfile = st.create_file("readwrite")
        fpw = FilePostingWriter(postfile, blocklimit=8)
        fpw.start(format)
        for id, freq in postings:
            fpw.write(id, float(freq), format.encode(freq), 0)
        fpw.finish()
        fpw.close()

        postfile = st.open_file("readwrite")
        fpr = FilePostingReader(postfile, 0, format)
        assert_equal(postings, list(fpr.items_as("frequency")))
        postfile.close()

예제 #4

파일 보기

    def new_field(self, field_name: str, field_data):
        """
        Add a new field. If the schema is not yet defined the writer will add the field_name inside the schema

        Args:
            field_name (str): Name of the new field
            field_data: Data to put into the field
        """
        if not self.__schema_defined:
            self.__writer.add_field(field_name,
                                    KEYWORD(stored=True, vector=Frequency()))
        self.__doc[field_name] = field_data

예제 #5

파일 보기

    def __init__(self, minsize=2, maxsize=4, stored=False, field_boost=1.0):
        """
        :param stored: Whether to store the value of this field with the
            document. Since this field type generally contains a lot of text,
            you should avoid storing it with the document unless you need to,
            for example to allow fast excerpts in the search results.
        :param minsize: The minimum length of the N-grams.
        :param maxsize: The maximum length of the N-grams.
        """

        self.format = Frequency(analyzer=NgramAnalyzer(minsize, maxsize),
                                field_boost=field_boost)
        self.scorable = True
        self.stored = stored

예제 #6

파일 보기

파일: fields.py 프로젝트: gnuaha7/tagfs

 def __init__(self, stored=False, lowercase=False, commas=False,
              scorable=False, unique=False, field_boost=1.0):
     """
     :param stored: Whether to store the value of the field with the
         document.
     :param comma: Whether this is a comma-separated field. If this is False
         (the default), it is treated as a space-separated field.
     :param scorable: Whether this field is scorable.
     """
     
     ana = KeywordAnalyzer(lowercase=lowercase, commas=commas)
     self.format = Frequency(analyzer=ana, field_boost=field_boost)
     self.scorable = scorable
     self.stored = stored
     self.unique = unique

예제 #7

파일 보기

    def _schema(self):
        # Creates a schema given this object's mingram and maxgram attributes.

        from whoosh.fields import Schema, FieldType, ID, STORED
        from whoosh.formats import Frequency
        from whoosh.analysis import SimpleAnalyzer

        idtype = ID()
        freqtype = FieldType(Frequency(), SimpleAnalyzer())

        fls = [("word", STORED), ("score", STORED)]
        for size in xrange(self.mingram, self.maxgram + 1):
            fls.extend([("start%s" % size, idtype), ("end%s" % size, idtype),
                        ("gram%s" % size, freqtype)])

        return Schema(**dict(fls))

예제 #8

파일 보기

파일: text_interface.py 프로젝트: rbarile17/orange_cb_recsys

 def schema_type(self):
     return KEYWORD(stored=True, commas=True, vector=Frequency())

예제 #9

파일 보기

파일: test_postings.py 프로젝트: sangensong/whoosh-1

def test_frequency_postings():
    content = u("alfa bravo charlie bravo alfa alfa")
    assert _roundtrip(content, Frequency(), "frequency") == [("alfa", 3), ("bravo", 2), ("charlie", 1)]