def small_text():
    sentence1 = "Travel kills time."
    sentence2 = "France is a nice country."
    text = sentence1 + " " + sentence2

    term_extractor = C_NC_TermExtractor(text)
    terms = term_extractor.compute_cnc()
    former = ConceptFormer()
    former.form_concepts(terms)

    tripels = list(RelationExtractor.find_realation(text))
    former.find_hearst_concepts(tripels)

    print "Taxonomy: "
    pprint(former.get_taxonomy())

    concepts, relations = [], []
    for concept in list(former.get_taxonomy()):
        concepts.append(" ".join(concept.name))
        relations += concept.make_tripels()

    print "no con.: " + str(len(concepts))
    print "no rel.: " + str(len(relations))

    utils.dot_to_image(utils.taxonomy_to_dot(concepts, relations), 'france')
示例#2
0
def small_text():
    sentence1 = "Travel kills time."
    sentence2 = "France is a nice country."
    text = sentence1 + " " + sentence2

    term_extractor = C_NC_TermExtractor(text)
    terms = term_extractor.compute_cnc()
    former = ConceptFormer()
    former.form_concepts(terms)

    tripels = list(RelationExtractor.find_realation(text))
    former.find_hearst_concepts(tripels)

    print "Taxonomy: "
    pprint(former.get_taxonomy())

    concepts, relations = [], []
    for concept in list(former.get_taxonomy()):
        concepts.append(" ".join(concept.name))
        relations += concept.make_tripels()

    print "no con.: " + str(len(concepts))
    print "no rel.: " + str(len(relations))

    utils.dot_to_image(utils.taxonomy_to_dot(concepts, relations), 'france')
def long_text():
    c = CorpusReader("corpora/snakes.corp")
    text = c.get_corpus()

    term_extractor = C_NC_TermExtractor(text)
    terms = term_extractor.compute_cnc()

    former = ConceptFormer()
    former.form_concepts(terms)

    print "Taxonomy: "
    pprint(former.get_taxonomy())

    concepts, relations = [], []
    for concept in list(former.get_taxonomy()):
        concepts.append(" ".join(concept.name))
        relations += concept.make_tripels()

    print "no con.: " + str(len(concepts))
    print "no rel.: " + str(len(relations))

    utils.dot_to_image(utils.taxonomy_to_dot(concepts, relations), 'snake')
示例#4
0
def long_text():
    c = CorpusReader("corpora/snakes.corp")
    text = c.get_corpus()

    term_extractor = C_NC_TermExtractor(text)
    terms = term_extractor.compute_cnc()

    former = ConceptFormer()
    former.form_concepts(terms)

    print "Taxonomy: "
    pprint(former.get_taxonomy())

    concepts, relations = [], []
    for concept in list(former.get_taxonomy()):
        concepts.append(" ".join(concept.name))
        relations += concept.make_tripels()

    print "no con.: " + str(len(concepts))
    print "no rel.: " + str(len(relations))

    utils.dot_to_image(utils.taxonomy_to_dot(concepts, relations), 'snake')
示例#5
0
        #    print i
        #raw_input()

        #s = "The Anaconda, or Water Boa, is one the world's largest snakes, when born they can be 3 feet (1m) long."
        #s = ' '.join(sys.argv[1:])

        sentences = sent_tokenize(s)
        for s in sentences:
            count += 1
            tree = pp.parse_sentence(s, parser)
            tree = tree[0]
            #tree = Tree('S', [Tree('NP', [Tree('NNP', ['Leon'])]), Tree('VP', [Tree('VBZ', ['hits']), Tree('NP', [Tree('NNP', ['Kai'])])]), Tree('.', ['.'])])

            path = utils.get_knoex_path()
            dot_code = utils.nltk_tree_to_dot(tree)
            utils.dot_to_image(dot_code, 'temptree_' + str(count))
            if show == 2:
                os.popen('gnome-open ' + 'temptree_' + str(count) + '.png')

            g, _ = match_tree(tree, pattern_dict)
            graph += g

        while ['', '', ''] in graph:
            graph.remove(['', '', ''])
        print graph

        dot_code = utils.list_of_tripels_to_dot_fancy(graph)
        utils.dot_to_image(dot_code, 'tempgraph')

        if show:
            os.popen('gnome-open ' + 'tempgraph.png')
import utils
import preprocessor as pp
from tree_combinations import numerate_non_terminals
import hearst_patterns
file_name = 'LeonHitsKai'
text = 'Leon hits Kai.'

print 'get relations by applying hearst patterns'
relations = hearst_patterns.find_realation(text)
print relations
print
print 'generate dot code'
dot_code = utils.list_of_tripels_to_dot(relations)
print dot_code
print
print 'convert dot code to image'
utils.dot_to_image(dot_code, file_name + '_relations')




    tree = pp.parse_sentence(s,'stat')
    #numerate_non_terminals(tree)
    dot_code = utils.nltk_tree_to_dot(tree)
    print dot_code
    utils.dot_to_image(dot_code, file_name + '_stat')
    print
except :
    print 'cannot parse with stat'
"""

try:
    tree = pp.parse_sentence(s, 'stanford')
    #numerate_non_terminals(tree)
    tree = tree[0]
    dot_code = utils.nltk_tree_to_dot(tree)
    utils.dot_to_image(dot_code, file_name + '_stanford')

    #dot_code = utils.list_of_tripels_to_dot(dep)
    #utils.dot_to_image(dot_code, file_name + '_dep_stanford')

    print "stanford done"
except:
    print 'cannot parse with stanford'
"""
try :
    tree = pp.parse_sentence(s,'berkeley')
    tree = tree[0]
    #numerate_non_terminals(tree)
    dot_code = utils.nltk_tree_to_dot(tree)
    utils.dot_to_image(dot_code, file_name + '_berkeley')
    print "berkeley done"
import utils
import preprocessor as pp
from tree_combinations import numerate_non_terminals
import hearst_patterns
file_name = 'LeonHitsKai'
text = 'Leon hits Kai.'

print 'get relations by applying hearst patterns'
relations = hearst_patterns.find_realation(text)
print relations
print
print 'generate dot code'
dot_code = utils.list_of_tripels_to_dot(relations)
print dot_code
print
print 'convert dot code to image'
utils.dot_to_image(dot_code, file_name + '_relations')




示例#9
0
        #    print i
        #raw_input()

        #s = "The Anaconda, or Water Boa, is one the world's largest snakes, when born they can be 3 feet (1m) long."
        #s = ' '.join(sys.argv[1:])
        
        sentences = sent_tokenize(s)
        for s in sentences:
            count+=1
            tree = pp.parse_sentence(s,parser)
            tree = tree[0]
            #tree = Tree('S', [Tree('NP', [Tree('NNP', ['Leon'])]), Tree('VP', [Tree('VBZ', ['hits']), Tree('NP', [Tree('NNP', ['Kai'])])]), Tree('.', ['.'])])
            
            path = utils.get_knoex_path()
            dot_code = utils.nltk_tree_to_dot(tree)
            utils.dot_to_image(dot_code, 'temptree_'+str(count))
            if show == 2:
                os.popen('gnome-open ' + 'temptree_'+str(count)+'.png')

            g,_ = match_tree(tree, pattern_dict)
            graph += g

        while ['','',''] in graph:
            graph.remove(['','',''])
        print graph
        
        dot_code = utils.list_of_tripels_to_dot_fancy(graph)
        utils.dot_to_image(dot_code, 'tempgraph')

        if show :
            os.popen('gnome-open ' + 'tempgraph.png')
示例#10
0
    tree = pp.parse_sentence(s,'stat')
    #numerate_non_terminals(tree)
    dot_code = utils.nltk_tree_to_dot(tree)
    print dot_code
    utils.dot_to_image(dot_code, file_name + '_stat')
    print
except :
    print 'cannot parse with stat'
"""

try :
    tree = pp.parse_sentence(s,'stanford')
    #numerate_non_terminals(tree)
    tree = tree[0]
    dot_code = utils.nltk_tree_to_dot(tree)
    utils.dot_to_image(dot_code, file_name + '_stanford')

    #dot_code = utils.list_of_tripels_to_dot(dep)
    #utils.dot_to_image(dot_code, file_name + '_dep_stanford')

    print "stanford done"
except :
    print 'cannot parse with stanford'


"""
try :
    tree = pp.parse_sentence(s,'berkeley')
    tree = tree[0]
    #numerate_non_terminals(tree)
    dot_code = utils.nltk_tree_to_dot(tree)