Exemplo n.º 1
0
def main():
    fw = open('58.txt', 'w')
    sys.stdout = fw

    document = exp50.read('50.txt.xml')
    collapsed_dependencies = exp57.make_collapsed_dependencies(document)

    flag = 0
    
    for line in collapsed_dependencies:
        nsubj_pairs = []
        dobj_pairs = []
        for item in line:
            search_nsubj = re.search('<dep type="nsubj">', item)
            search_dobj = re.search('<dep type="dobj">', item)
            #print item
            if search_nsubj:
                flag = 1
                nsubj_pair = []
            if search_dobj:
                flag = 2
                dobj_pair = []
            if item == '</dep>':
                flag = 0

            if flag == 1:
                governor = exp57.search_governor(item) 
                dependent = exp57.search_dependent(item)

                if governor != None:
                    nsubj_pair.append(governor)
                if dependent != None:
                    nsubj_pair.append(dependent)

                    if len(nsubj_pair) == 2:
                        nsubj_pairs.append(nsubj_pair)
            
            if flag == 2:
                governor = exp57.search_governor(item) 
                dependent = exp57.search_dependent(item)

                if governor != None:
                    dobj_pair.append(governor)
                if dependent != None:
                    dobj_pair.append(dependent)

                    if len(dobj_pair) == 2:
                        dobj_pairs.append(dobj_pair)

        sentences = make_sentence(nsubj_pairs, dobj_pairs)
        #print sentences

        for sentence in sentences:
            if len(sentence) == 2:
                print sentence[1][1], sentence[1][0], sentence[0]
Exemplo n.º 2
0
def main():
    fw = open('52.txt', 'w')
    sys.stdout = fw
    stemmer = nltk.PorterStemmer()

    document = exp50.read('51.txt')
    
    for line in document:
        print line, '\t', stemmer.stem(line)

    fw.close()
Exemplo n.º 3
0
def main():
    fw = open('53.txt', 'w')
    sys.stdout = fw

    document = exp50.read('50.txt.xml')

    for line in document:
        search = re.search('<word>(.+)</word>', line)

        if search:
            print search.group(1)

    fw.close()
Exemplo n.º 4
0
def main():
    fw = open('53.txt', 'w')
    sys.stdout = fw
    
    document = exp50.read('50.txt.xml')
    
    for line in document:
        search = re.search('<word>(.+)</word>', line)

        if search:
            print search.group(1)

    fw.close()
Exemplo n.º 5
0
def main():
    fw = open('51.txt', 'w')
    sys.stdout = fw

    document = exp50.read('50.txt')
    
    for line in document:
        string = re.sub('\s', '\n', line)
        search = re.search('(\w+\.|\w+\?|\w+!|\w+\:|\w+\;)', string)
        if search:
            print string
            print '\n'.strip()
        else:
            print string    

    fw.close()
Exemplo n.º 6
0
def main():
    fw = open('54.txt', 'w')
    sys.stdout = fw

    document = exp50.read('50.txt.xml')
    
    for line in document:
        search1 = re.search('<word>(\w+)</word>', line)
        search2 = re.search('<lemma>(\w+)</lemma>', line)
        search3 = re.search('<POS>(\w+)</POS>', line)

        if search1:
            print search1.group(1), '\t',
        if search2:
            print search2.group(1), '\t',
        if search3:
            print search3.group(1)
        
    fw.close()
Exemplo n.º 7
0
def main():
    fw = open('54.txt', 'w')
    sys.stdout = fw

    document = exp50.read('50.txt.xml')

    for line in document:
        search1 = re.search('<word>(\w+)</word>', line)
        search2 = re.search('<lemma>(\w+)</lemma>', line)
        search3 = re.search('<POS>(\w+)</POS>', line)

        if search1:
            print search1.group(1), '\t',
        if search2:
            print search2.group(1), '\t',
        if search3:
            print search3.group(1)

    fw.close()
Exemplo n.º 8
0
def main():
    fw = open('copy.txt', 'w')
    sys.stdout = fw

    document = exp50.read('50.txt.xml')
    parse_trees = s_list(document)

    for parse_tree in  parse_trees[1]:
        add_space_list = add_space(parse_tree)
        s = make_parse_list(add_space_list)
        np = make_np_list(s)

        for item in np:
            #print item
            np_word = extract_np(item)
            print ' '.join(np_word)
            
        #print np_word
        
    fw.close()
Exemplo n.º 9
0
def main():
    fw = open('copy.txt', 'w')
    sys.stdout = fw

    document = exp50.read('50.txt.xml')
    parse_trees = s_list(document)

    for parse_tree in parse_trees[1]:
        add_space_list = add_space(parse_tree)
        s = make_parse_list(add_space_list)
        np = make_np_list(s)

        for item in np:
            #print item
            np_word = extract_np(item)
            print ' '.join(np_word)

        #print np_word

    fw.close()
Exemplo n.º 10
0
def main():
    fw = open('57.dot', 'w')
    sys.stdout = fw

    document = exp50.read('50.txt.xml')
    collapsed_dependencies = make_collapsed_dependencies(document)
    
    print 'digraph sample{'
    print '', 'graph [rankdir = LR];'

    for line in collapsed_dependencies[2]:
        governor = search_governor(line)
        dependent = search_dependent(line)

        if governor != None:
            print governor, '->',
        if dependent != None:
            print dependent
    print '}'
    
    fw.close()
Exemplo n.º 11
0
# !/usr/bin/python
# coding:UTF-8
# 6-(55):固有表現抽出
#入力文中の人名をすべて抜き出せ.

import re
import sys
import exp50

if __name__ == "__main__":
    fw = open('55.txt', 'w')
    sys.stdout = fw

    document = exp50.read('50.txt.xml')
    sentence = []
    flag = 0
    
    for line in document:
        search1 = re.search('<token id=".+">', line)
        search2 = re.search('</token>', line)
        if search1:
            word = []
            flag = 1
        if search2:
            sentence.append(word)
            flag = 0
        if flag == 1:
            word.append(line)

    for word in sentence:
        if '<NER>PERSON</NER>' in word:
Exemplo n.º 12
0
# !/usr/bin/python
# coding:UTF-8
# 6-(55):固有表現抽出
#入力文中の人名をすべて抜き出せ.

import re
import sys
import exp50

if __name__ == "__main__":
    fw = open('55.txt', 'w')
    sys.stdout = fw

    document = exp50.read('50.txt.xml')
    sentence = []
    flag = 0

    for line in document:
        search1 = re.search('<token id=".+">', line)
        search2 = re.search('</token>', line)
        if search1:
            word = []
            flag = 1
        if search2:
            sentence.append(word)
            flag = 0
        if flag == 1:
            word.append(line)

    for word in sentence:
        if '<NER>PERSON</NER>' in word: