Exemplo n.º 1
0
def main():
    tagger = Tagger()
    wakati_tagger = Tagger('-Owakati')
    text = '私はご飯を食べます。'
    
    result = wakati_tagger.parse(text)
    print('result1(parse + wakati):')
    print(result)
    print(type(result))
    print()

    result = tagger.parse(text)
    print('result2(parse):')
    print(result)
    print(type(result))
    print()

    result = wakati_tagger(text)
    print('result3(_call_+wakati):')
    print(result)
    print(type(result))
    print(inspect.getmembers(result[0]))
    print(type(result[0]))
    print()

    result = tagger(text)
    print('result4(_call_):')
    print(result)
    print(type(result))
    print(inspect.getmembers(result[0]))
    print(type(result[0]))
    print()
    print('DONE')
Exemplo n.º 2
0
def main():
    tagger = Tagger()
    neologd_tagger = Tagger('-d /usr/lib/x86_64-linux-gnu/mecab/dic/mecab-unidic-neologd')

    text = '私は、渋谷ストリームでランチを食べる。'
    print('unidic:')
    print(tagger.parse(text))
    print()

    print('unidic-neologd:')
    print(neologd_tagger.parse(text))
    print('DONE')
class Tokenizer():
    def __init__(self):
        self.tagger = Tagger("-Owakati")

    def tokenize(self, text):
        tokens = self.tagger.parse(text).split(" ")
        return tokens
Exemplo n.º 4
0
def main():
    text = 'softbank'
    tagger = Tagger()
    gtagger = GenericTagger()

    print('Tagger:')
    print(tagger.parse(text))
    for word in tagger(text):
        print(word.surface)
        print(word.feature)
    print()

    print('GenericTagger:')
    print(gtagger.parse(text))
    for word in gtagger(text):
        print(word.surface)
        print(word.feature)
    print()
    print('DONE')
Exemplo n.º 5
0
def test_wakati(text, wakati):
    tagger = Tagger('-Owakati')
    assert tagger.parse(text) == wakati
Exemplo n.º 6
0
text = 'でないと'


filter_text = text.replace(' ','');
filter_text = text.replace('?',''); 
filter_text = text.replace('!','');
filter_text = text.replace('...','');
filter_text = text.replace('(','');
filter_text = text.replace(')','');    

text_binary = np.zeros(len(filter_text))  # 1 si des mots ont été trouvés à cet emplacement, sinon 0


text_kana = text  #Texte qui sera traduit en kana

tagger.parse(text)
# => '麩 菓子 は 、 麩 を 主材 料 と し た 日本 の 菓子 。'

 
def data_preparation(csv): 
    regles = []
    explications = []
    for l in csv:  
        l.strip()
        ligne = l.split(';') 
        if 'null' not in ligne:
            regles.append(ligne[0])
            explications.append(ligne[1])
    
    return regles, explications
#!/usr/bin/env python
from fugashi import Tagger
tt = Tagger('-Owakati')
from collections import Counter

wc = Counter()

for line in open('wagahai.txt'):
    for word in tt.parse(line.strip()).split(' '):
        wc[word] += 1