示例#1
0
def test_extract_word():
    got = mecab.extract_word('環境音楽だ', '名詞')
    assert got, [u'環境' == u'音楽']
    got = mecab.extract_word('環境音楽だ', '名詞', phrase=True)
    assert got, [u'環境', u'音楽' == u'環境音楽']
    got = mecab.extract_word('寝ろ', '動詞', rootform=True)
    assert got == [u'寝る']
示例#2
0
def test_extract_word():
    got = mecab.extract_word('環境音楽だ', '名詞')
    assert_equals(got, [u'環境', u'音楽'])
    got = mecab.extract_word('環境音楽だ', '名詞', phrase=True)
    assert_equals(got, [u'環境', u'音楽', u'環境音楽'])
    got = mecab.extract_word('寝ろ', '動詞', rootform=True)
    assert_equals(got, [u'寝る'])
示例#3
0
def respond(text):
    """Extract a past post responding a post similar to given text
    """
    for response in _extract_response_by_search([text], False):
        if response:
            yield response

    query = mecab.extract_word(text, 'content_word')
    for response in _extract_response_by_search([' '.join(query)], True):
        if response:
            yield response

    query = mecab.extract_word(text, ('名詞,固有名詞',))
    for response in _extract_response_by_search([' '.join(query)], True):
        if response:
            yield response
示例#4
0
def respond(text, *args):
    """Extract a past post responding a post similar to given text
    """
    for response in _extract_response_by_search([text], False):
        if response:
            yield response

    query = mecab.extract_word(text, 'content_word')
    for response in _extract_response_by_search([' '.join(query)], True):
        if response:
            yield response

    query = mecab.extract_word(text, ('名詞,固有名詞',))
    for response in _extract_response_by_search([' '.join(query)], True):
        if response:
            yield response
示例#5
0
 def count(self, log):
     for post in log:
         for idx in ('text', 'q1', 'q2'):
             if not post.get(idx):
                 continue
             if isinstance(post[idx], list):
                 post[idx] = '\n'.join(post[idx])
             for line in post[idx].splitlines():
                 line = self.prepare_for_counting(line)
                 for w in mecab.extract_word(line):
                     ws = []
                     ws.append(self.word_ids[w])
                     if w not in self.words:
                         self.words.append(w)
                     cntr = Counter(ws)
                     for word in cntr.keys():
                         self.word_counts[word] += cntr
                     self.unique_wordcounts += cntr
示例#6
0
 def count(self, log):
     for post in log:
         for idx in ('text', 'q1', 'q2'):
             if not post.get(idx):
                 continue
             if isinstance(post[idx], list):
                 post[idx] = '\n'.join(post[idx])
             for line in post[idx].splitlines():
                 line = self.prepare_for_counting(line)
                 for w in mecab.extract_word(line):
                     if w in NG_WORDS:
                         continue
                     ws = []
                     ws.append(self.word_ids[w])
                     if w not in self.words:
                         self.words.append(w)
                     cntr = Counter(ws)
                     for word in cntr.keys():
                         self.word_counts[word] += cntr
                     self.unique_wordcounts += cntr
示例#7
0
def respond(text, *args):
    logs = kuzuha.search(mecab.extract_word(text))
    logs = [cleansing(log.get('text', '')) for log in logs]
    for message in get_longest_common_substring(''.join(logs)):
        if message:
            yield message + '(;´Д`)'