Exemplo n.º 1
0
def main():
    lines = open(sys.argv[1]).readlines()
    text = nlp040.parse_text(lines)

    for sentence in text:
        for i in range(len(sentence)):
            if sentence[i].contains('名詞'):
                out = extract(sentence, i)
                if len(out):
                    p(' -> '.join(out) + '\n')
Exemplo n.º 2
0
def main():
    lines = open(sys.argv[1]).readlines()
    text = nlp040.parse_text(lines)
    for sentence in text:
        for chunk in sentence:
            if len(chunk.srcs) > 0 and chunk.contains('動詞') \
               and any([sentence[idx].contains('名詞') for idx in chunk.srcs]):
                txt = ''
                for src in chunk.srcs:
                    txt += sentence[src].text()
                print(txt + '\t' + chunk.text())
Exemplo n.º 3
0
def main():
    lines = open(sys.argv[1]).readlines()
    text = nlp040.parse_text(lines)

    for sentence in text:
        for chunk in sentence:
            if len(chunk.srcs) > 0:
                txt = ''
                for src in chunk.srcs:
                    txt += sentence[src].text()
                print(txt + '\t' + chunk.text())
Exemplo n.º 4
0
def main():
    lines = open(sys.argv[1]).readlines()
    text = nlp040.parse_text(lines)
    with open(sys.argv[2], 'w') as o:
        i = 0
        o.write('digraph 吾輩は猫である {\n')
        for sentence in text[0:10]:
            o.write('subgraph sentence{0} {{\n'.format(i))
            for chunk in sentence:
                if chunk.dst > 0:
                    o.write('"' + chunk.text() + '" -> "' +
                            sentence[chunk.dst].text() + '";\n')
            o.write('}\n')
            i += 1
        o.write('}\n')
Exemplo n.º 5
0
def main():
    lines = open(sys.argv[1]).readlines()
    text = nlp040.parse_text(lines)

    with open(sys.argv[2], 'w') as o:
        for sentence in text:
            for chunk in sentence:
                for x in chunk.get('動詞'):
                    ys = []
                    zs = []
                    for src in chunk.srcs:
                        for y in sentence[src].get('助詞'):
                            ys.append(y.base)
                            zs.append(sentence[src].text())
                    if len(ys) > 0:
                        lis = sorted(zip(ys, zs), key=lambda x: x[0])
                        ys = [x[0] for x in lis]
                        zs = [x[1] for x in lis]
                        o.write(x.base + '\t' + ' '.join(ys) + '\t' +
                                ' '.join(zs) + '\n')