def small_text(): sentence1 = "Travel kills time." sentence2 = "France is a nice country." text = sentence1 + " " + sentence2 term_extractor = C_NC_TermExtractor(text) terms = term_extractor.compute_cnc() former = ConceptFormer() former.form_concepts(terms) tripels = list(RelationExtractor.find_realation(text)) former.find_hearst_concepts(tripels) print "Taxonomy: " pprint(former.get_taxonomy()) concepts, relations = [], [] for concept in list(former.get_taxonomy()): concepts.append(" ".join(concept.name)) relations += concept.make_tripels() print "no con.: " + str(len(concepts)) print "no rel.: " + str(len(relations)) utils.dot_to_image(utils.taxonomy_to_dot(concepts, relations), 'france')
def long_text(): c = CorpusReader("corpora/snakes.corp") text = c.get_corpus() term_extractor = C_NC_TermExtractor(text) terms = term_extractor.compute_cnc() former = ConceptFormer() former.form_concepts(terms) print "Taxonomy: " pprint(former.get_taxonomy()) concepts, relations = [], [] for concept in list(former.get_taxonomy()): concepts.append(" ".join(concept.name)) relations += concept.make_tripels() print "no con.: " + str(len(concepts)) print "no rel.: " + str(len(relations)) utils.dot_to_image(utils.taxonomy_to_dot(concepts, relations), 'snake')
# print i #raw_input() #s = "The Anaconda, or Water Boa, is one the world's largest snakes, when born they can be 3 feet (1m) long." #s = ' '.join(sys.argv[1:]) sentences = sent_tokenize(s) for s in sentences: count += 1 tree = pp.parse_sentence(s, parser) tree = tree[0] #tree = Tree('S', [Tree('NP', [Tree('NNP', ['Leon'])]), Tree('VP', [Tree('VBZ', ['hits']), Tree('NP', [Tree('NNP', ['Kai'])])]), Tree('.', ['.'])]) path = utils.get_knoex_path() dot_code = utils.nltk_tree_to_dot(tree) utils.dot_to_image(dot_code, 'temptree_' + str(count)) if show == 2: os.popen('gnome-open ' + 'temptree_' + str(count) + '.png') g, _ = match_tree(tree, pattern_dict) graph += g while ['', '', ''] in graph: graph.remove(['', '', '']) print graph dot_code = utils.list_of_tripels_to_dot_fancy(graph) utils.dot_to_image(dot_code, 'tempgraph') if show: os.popen('gnome-open ' + 'tempgraph.png')
import utils import preprocessor as pp from tree_combinations import numerate_non_terminals import hearst_patterns file_name = 'LeonHitsKai' text = 'Leon hits Kai.' print 'get relations by applying hearst patterns' relations = hearst_patterns.find_realation(text) print relations print print 'generate dot code' dot_code = utils.list_of_tripels_to_dot(relations) print dot_code print print 'convert dot code to image' utils.dot_to_image(dot_code, file_name + '_relations')
tree = pp.parse_sentence(s,'stat') #numerate_non_terminals(tree) dot_code = utils.nltk_tree_to_dot(tree) print dot_code utils.dot_to_image(dot_code, file_name + '_stat') print except : print 'cannot parse with stat' """ try: tree = pp.parse_sentence(s, 'stanford') #numerate_non_terminals(tree) tree = tree[0] dot_code = utils.nltk_tree_to_dot(tree) utils.dot_to_image(dot_code, file_name + '_stanford') #dot_code = utils.list_of_tripels_to_dot(dep) #utils.dot_to_image(dot_code, file_name + '_dep_stanford') print "stanford done" except: print 'cannot parse with stanford' """ try : tree = pp.parse_sentence(s,'berkeley') tree = tree[0] #numerate_non_terminals(tree) dot_code = utils.nltk_tree_to_dot(tree) utils.dot_to_image(dot_code, file_name + '_berkeley') print "berkeley done"
# print i #raw_input() #s = "The Anaconda, or Water Boa, is one the world's largest snakes, when born they can be 3 feet (1m) long." #s = ' '.join(sys.argv[1:]) sentences = sent_tokenize(s) for s in sentences: count+=1 tree = pp.parse_sentence(s,parser) tree = tree[0] #tree = Tree('S', [Tree('NP', [Tree('NNP', ['Leon'])]), Tree('VP', [Tree('VBZ', ['hits']), Tree('NP', [Tree('NNP', ['Kai'])])]), Tree('.', ['.'])]) path = utils.get_knoex_path() dot_code = utils.nltk_tree_to_dot(tree) utils.dot_to_image(dot_code, 'temptree_'+str(count)) if show == 2: os.popen('gnome-open ' + 'temptree_'+str(count)+'.png') g,_ = match_tree(tree, pattern_dict) graph += g while ['','',''] in graph: graph.remove(['','','']) print graph dot_code = utils.list_of_tripels_to_dot_fancy(graph) utils.dot_to_image(dot_code, 'tempgraph') if show : os.popen('gnome-open ' + 'tempgraph.png')
tree = pp.parse_sentence(s,'stat') #numerate_non_terminals(tree) dot_code = utils.nltk_tree_to_dot(tree) print dot_code utils.dot_to_image(dot_code, file_name + '_stat') print except : print 'cannot parse with stat' """ try : tree = pp.parse_sentence(s,'stanford') #numerate_non_terminals(tree) tree = tree[0] dot_code = utils.nltk_tree_to_dot(tree) utils.dot_to_image(dot_code, file_name + '_stanford') #dot_code = utils.list_of_tripels_to_dot(dep) #utils.dot_to_image(dot_code, file_name + '_dep_stanford') print "stanford done" except : print 'cannot parse with stanford' """ try : tree = pp.parse_sentence(s,'berkeley') tree = tree[0] #numerate_non_terminals(tree) dot_code = utils.nltk_tree_to_dot(tree)