Exemplos de Tagger em Python, exemplos de fugashi.Tagger em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: test_basic.py Projeto: HiromuHota/fugashi

def test_accent(text, accent):
    # This checks for correct handling of feature fields containing commas as reported in #13
    tagger = Tagger()
    tokens = tagger.parseToNodeList(text)
    # Skip if UnidicFeatures17 is used because it doesn't have 'atype' attribute
    if tokens and isinstance(tokens[0].feature, UnidicFeatures17):
        pytest.skip()
    accent_ = [tok.feature.aType for tok in tokens]
    assert accent_ == accent

Exemplo n.º 2

0

Exibir arquivo

Arquivo: tokenizer.py Projeto: ZHAOTING/Japanese-sentence-similarity-scorer

class Tokenizer():
    def __init__(self):
        self.tagger = Tagger("-Owakati")

    def tokenize(self, text):
        tokens = self.tagger.parse(text).split(" ")
        return tokens

Exemplo n.º 3

0

Exibir arquivo

Arquivo: util.py Projeto: 9001/softchat

def load_fugashi(write_cfg=False):
    try:
        # help python find libmecab.dll, adjust this to fit your env if necessary
        dll_path = None
        for base in sys.path:
            x = os.path.join(base, "fugashi")
            if os.path.exists(os.path.join(x, "cli.py")) and not dll_path:
                dll_path = x
            x2 = os.path.join(x, "../../../lib/site-packages/fugashi")
            if os.path.exists(x2):
                dll_path = x2
                break

        if not dll_path:
            raise Exception("could not find fugashi installation path")

        if WINDOWS:
            os.add_dll_directory(dll_path)

        from fugashi import Tagger

        dicrc = os.path.join(dll_path, "dicrc")
        if write_cfg:
            with open(dicrc, "wb") as f:
                f.write("\n".join([
                    r"node-format-yomi = %f[9] ",
                    r"unk-format-yomi = %m",
                    r"eos-format-yomi  = \n",
                    "",
                ]).encode("utf-8"))

        wakati = Tagger("-Owakati")
        yomi = Tagger("-Oyomi -r " + dicrc.replace("\\", "\\\\"))

        # import MeCab
        # wakati = MeCab.Tagger('-Owakati')
        info("found fugashi")
        return wakati, yomi
    except:
        import traceback

        warn("could not load fugashi:\n" + traceback.format_exc() + "-" * 72 +
             "\n")

Exemplo n.º 4

0

Exibir arquivo

def main():
    text = 'softbank'
    tagger = Tagger()
    gtagger = GenericTagger()

    print('Tagger:')
    print(tagger.parse(text))
    for word in tagger(text):
        print(word.surface)
        print(word.feature)
    print()

    print('GenericTagger:')
    print(gtagger.parse(text))
    for word in gtagger(text):
        print(word.surface)
        print(word.feature)
    print()
    print('DONE')

Exemplo n.º 5

0

Exibir arquivo

Arquivo: main.py Projeto: wararaki718/scrapbox3

def main():
    tagger = Tagger()
    wakati_tagger = Tagger('-Owakati')
    text = '私はご飯を食べます。'
    
    result = wakati_tagger.parse(text)
    print('result1(parse + wakati):')
    print(result)
    print(type(result))
    print()

    result = tagger.parse(text)
    print('result2(parse):')
    print(result)
    print(type(result))
    print()

    result = wakati_tagger(text)
    print('result3(_call_+wakati):')
    print(result)
    print(type(result))
    print(inspect.getmembers(result[0]))
    print(type(result[0]))
    print()

    result = tagger(text)
    print('result4(_call_):')
    print(result)
    print(type(result))
    print(inspect.getmembers(result[0]))
    print(type(result[0]))
    print()
    print('DONE')

Exemplo n.º 6

0

Exibir arquivo

Arquivo: cli.py Projeto: polm/fugashi

def info():
    """Print configuration info."""
    args = ' '.join(sys.argv[1:])
    try:
        tagger = GenericTagger(args, quiet=True)
    except RuntimeError:
        tagger = Tagger(args)
    #TODO get the fugashi version here too
    print("Fugashi dictionary info:")
    print("-----")
    for di in tagger.dictionary_info:
        for field in 'version size charset filename'.split():
            print( (field + ':').ljust(10), di[field])
        print('-----')

Exemplo n.º 7

0

Exibir arquivo

Arquivo: main.py Projeto: wararaki718/scrapbox3

def main():
    tagger = Tagger()
    neologd_tagger = Tagger('-d /usr/lib/x86_64-linux-gnu/mecab/dic/mecab-unidic-neologd')

    text = '私は、渋谷ストリームでランチを食べる。'
    print('unidic:')
    print(tagger.parse(text))
    print()

    print('unidic-neologd:')
    print(neologd_tagger.parse(text))
    print('DONE')

Exemplo n.º 8

0

Exibir arquivo

Arquivo: cli.py Projeto: polm/fugashi

def main():
    """
    This is a simple wrapper for fugashi so you can test it from the command line.
    Like the mecab binary, it treats each line of stdin as one sentence. You can
    pass tagger arguments here too.
    """
    args = ' '.join(sys.argv[1:])

    # This should work if you specify a different dictionary,
    # but it should also work with the pip unidic.
    # Try the GenericTagger and then try the Unidic tagger.
    try:
        tagger = GenericTagger(args, quiet=True)
    except RuntimeError:
        tagger = Tagger(args)

    for line in fileinput.input([]):
        print(tagger.parse(line.strip()))

Exemplo n.º 9

0

Exibir arquivo