def main(): fw = open('58.txt', 'w') sys.stdout = fw document = exp50.read('50.txt.xml') collapsed_dependencies = exp57.make_collapsed_dependencies(document) flag = 0 for line in collapsed_dependencies: nsubj_pairs = [] dobj_pairs = [] for item in line: search_nsubj = re.search('<dep type="nsubj">', item) search_dobj = re.search('<dep type="dobj">', item) #print item if search_nsubj: flag = 1 nsubj_pair = [] if search_dobj: flag = 2 dobj_pair = [] if item == '</dep>': flag = 0 if flag == 1: governor = exp57.search_governor(item) dependent = exp57.search_dependent(item) if governor != None: nsubj_pair.append(governor) if dependent != None: nsubj_pair.append(dependent) if len(nsubj_pair) == 2: nsubj_pairs.append(nsubj_pair) if flag == 2: governor = exp57.search_governor(item) dependent = exp57.search_dependent(item) if governor != None: dobj_pair.append(governor) if dependent != None: dobj_pair.append(dependent) if len(dobj_pair) == 2: dobj_pairs.append(dobj_pair) sentences = make_sentence(nsubj_pairs, dobj_pairs) #print sentences for sentence in sentences: if len(sentence) == 2: print sentence[1][1], sentence[1][0], sentence[0]
def main(): fw = open('52.txt', 'w') sys.stdout = fw stemmer = nltk.PorterStemmer() document = exp50.read('51.txt') for line in document: print line, '\t', stemmer.stem(line) fw.close()
def main(): fw = open('53.txt', 'w') sys.stdout = fw document = exp50.read('50.txt.xml') for line in document: search = re.search('<word>(.+)</word>', line) if search: print search.group(1) fw.close()
def main(): fw = open('51.txt', 'w') sys.stdout = fw document = exp50.read('50.txt') for line in document: string = re.sub('\s', '\n', line) search = re.search('(\w+\.|\w+\?|\w+!|\w+\:|\w+\;)', string) if search: print string print '\n'.strip() else: print string fw.close()
def main(): fw = open('54.txt', 'w') sys.stdout = fw document = exp50.read('50.txt.xml') for line in document: search1 = re.search('<word>(\w+)</word>', line) search2 = re.search('<lemma>(\w+)</lemma>', line) search3 = re.search('<POS>(\w+)</POS>', line) if search1: print search1.group(1), '\t', if search2: print search2.group(1), '\t', if search3: print search3.group(1) fw.close()
def main(): fw = open('copy.txt', 'w') sys.stdout = fw document = exp50.read('50.txt.xml') parse_trees = s_list(document) for parse_tree in parse_trees[1]: add_space_list = add_space(parse_tree) s = make_parse_list(add_space_list) np = make_np_list(s) for item in np: #print item np_word = extract_np(item) print ' '.join(np_word) #print np_word fw.close()
def main(): fw = open('57.dot', 'w') sys.stdout = fw document = exp50.read('50.txt.xml') collapsed_dependencies = make_collapsed_dependencies(document) print 'digraph sample{' print '', 'graph [rankdir = LR];' for line in collapsed_dependencies[2]: governor = search_governor(line) dependent = search_dependent(line) if governor != None: print governor, '->', if dependent != None: print dependent print '}' fw.close()
# !/usr/bin/python # coding:UTF-8 # 6-(55):固有表現抽出 #入力文中の人名をすべて抜き出せ. import re import sys import exp50 if __name__ == "__main__": fw = open('55.txt', 'w') sys.stdout = fw document = exp50.read('50.txt.xml') sentence = [] flag = 0 for line in document: search1 = re.search('<token id=".+">', line) search2 = re.search('</token>', line) if search1: word = [] flag = 1 if search2: sentence.append(word) flag = 0 if flag == 1: word.append(line) for word in sentence: if '<NER>PERSON</NER>' in word: