def small_text(): sentence1 = "Travel kills time." sentence2 = "France is a nice country." text = sentence1 + " " + sentence2 term_extractor = C_NC_TermExtractor(text) terms = term_extractor.compute_cnc() former = ConceptFormer() former.form_concepts(terms) tripels = list(RelationExtractor.find_realation(text)) former.find_hearst_concepts(tripels) print "Taxonomy: " pprint(former.get_taxonomy()) concepts, relations = [], [] for concept in list(former.get_taxonomy()): concepts.append(" ".join(concept.name)) relations += concept.make_tripels() print "no con.: " + str(len(concepts)) print "no rel.: " + str(len(relations)) utils.dot_to_image(utils.taxonomy_to_dot(concepts, relations), 'france')
import concept_former as cf import term import hearst_patterns as hp former = cf.conceptFormer() text = 'The monkey bites the snake.' pattern = hp.find_realation(text) result = former.find_hearst_concepts(pattern) print result for concept in list(result): print str(concept) + ' ' + str(concept.get_relations())
import utils import preprocessor as pp from tree_combinations import numerate_non_terminals import hearst_patterns file_name = 'LeonHitsKai' text = 'Leon hits Kai.' print 'get relations by applying hearst patterns' relations = hearst_patterns.find_realation(text) print relations print print 'generate dot code' dot_code = utils.list_of_tripels_to_dot(relations) print dot_code print print 'convert dot code to image' utils.dot_to_image(dot_code, file_name + '_relations')
import corpus import hearst_patterns as hp import utils file_name = 'simple' #text = corpus.CorpusReader().get_corpus() text = 'Leon hits Kai. Marry f***s John. Kai greets Marry. John greets Leon.' relations = hp.find_realation(text) print for r in relations: print r if utils.which('dot'): dot_code = utils.list_of_tripels_to_dot(relations) utils.dot_to_image(dot_code, file_name + '_relations') else: print "didn't find dot"