示例#1
0
def save_tree(tree, d, name, postscript=False, **kwargs):
    drawable_tree = tree if d is None else get_drawable_tree(tree[0], d)
    extension = 'ps' if postscript else 'png'
    cf = CanvasFrame()
    tc = TreeWidget(cf.canvas(), drawable_tree)
    tc['node_font'] = 'arial 22 bold'
    tc['leaf_font'] = 'arial 22'
    tc['node_color'] = '#005990'
    tc['leaf_color'] = '#3F8F57'
    tc['line_color'] = '#175252'
    tc['xspace'] = 20
    tc['yspace'] = 20
    curr_y = 40 * len(kwargs)
    cf.add_widget(tc, 0, curr_y)
    cf.print_to_file('{0}.{1}'.format(name, extension))
    if not postscript:
        im1 = Image.open('{0}.{1}'.format(name, extension))
        im1 = im1.convert('RGB')
        curr_y = 10
        for key in kwargs:
            font = ImageFont.truetype("/fonts/Ubuntu-L.ttf", 24)
            draw = ImageDraw.Draw(im1)
            val = kwargs[key]
            format_str = '{0}'
            if isinstance(val, float):
                format_str = format_str + '={1:.4f}'
            else:
                format_str = format_str + '=  {1}'
            draw.text((10, curr_y), format_str.format(key, val), (0, 0, 0),
                      font)
            curr_y = curr_y + 40
        im1.save('{}.pdf'.format(name))
        cf.destroy()
        os.remove('{0}.{1}'.format(name, extension))
    return '{0}.{1}'.format(name, 'ps' if postscript else 'pdf')
示例#2
0
def quicktree(sentence):
    """Parse a sentence and return a visual representation in IPython"""
    import os
    from nltk import Tree
    from nltk.draw.util import CanvasFrame
    from nltk.draw import TreeWidget
    from stat_parser import Parser
    try:
        from IPython.display import display
        from IPython.display import Image
    except:
        pass
    try:
        get_ipython().getoutput()
    except TypeError:
        have_ipython = True
    except NameError:
        import subprocess
        have_ipython = False
    parser = Parser()
    parsed = parser.parse(sentence)
    cf = CanvasFrame()
    tc = TreeWidget(cf.canvas(), parsed)
    cf.add_widget(tc, 10, 10)  # (10,10) offsets
    cf.print_to_file('tree.ps')
    cf.destroy()
    if have_ipython:
        tregex_command = 'convert tree.ps tree.png'
        result = get_ipython().getoutput(tregex_command)
    else:
        tregex_command = ["convert", "tree.ps", "tree.png"]
        result = subprocess.check_output(tregex_command)
    os.remove("tree.ps")
    return Image(filename='tree.png')
    os.remove("tree.png")
示例#3
0
def draw_tree_to_file(tree, path):
    cf = CanvasFrame()
    tc = TreeWidget(cf.canvas(), tree)
    x, y = 0, 0
    cf.add_widget(tc, x, y)
    cf.print_to_file(path)
    cf.destroy()
示例#4
0
def save_tree(tree):
    cf = CanvasFrame()
    tc = TreeWidget(cf.canvas(),tree)
    cf.add_widget(tc,10,10) # (10,10) offsets
    cf.print_to_file('tree.ps')
    cf.destroy()
    os.system('convert tree.ps output.png')
示例#5
0
def quicktree(sentence):
    """Parse a sentence and return a visual representation in IPython"""
    import os
    from nltk import Tree
    from nltk.draw.util import CanvasFrame
    from nltk.draw import TreeWidget
    from stat_parser import Parser
    try:
        from IPython.display import display
        from IPython.display import Image
    except:
        pass
    try:
        get_ipython().getoutput()
    except TypeError:
        have_ipython = True
    except NameError:
        import subprocess
        have_ipython = False
    parser = Parser()
    parsed = parser.parse(sentence)
    cf = CanvasFrame()
    tc = TreeWidget(cf.canvas(),parsed)
    cf.add_widget(tc,10,10) # (10,10) offsets
    cf.print_to_file('tree.ps')
    cf.destroy()
    if have_ipython:
        tregex_command = 'convert tree.ps tree.png'
        result = get_ipython().getoutput(tregex_command)
    else:
        tregex_command = ["convert", "tree.ps", "tree.png"]
        result = subprocess.check_output(tregex_command)    
    os.remove("tree.ps")
    return Image(filename='tree.png')
    os.remove("tree.png")
示例#6
0
def format(sentence):
    filename = 'stanford-parser.jar'
    command = ['locate', filename]
    output = subprocess.Popen(command, stdout=subprocess.PIPE).communicate()[0]
    path_to_jar = output.decode().strip()

    filename = 'models.jar'
    command = ['locate', filename]
    output = subprocess.Popen(
        command, stdout=subprocess.PIPE).communicate()[0].decode().strip()
    output = output.split('\n')
    for op in output:
        if 'parse' in op:
            path_to_models_jar = op

    dependency_parser = StanfordDependencyParser(
        path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar)
    tokens = word_tokenize(sentence)
    result = dependency_parser.raw_parse(sentence)

    for dep in result:
        # print(dep.tree())
        cf = CanvasFrame()
        t = dep.tree()
        tc = TreeWidget(cf.canvas(), t)
        cf.add_widget(tc, 10, 10)  # (10,10) offsets
        cf.print_to_file('tree.ps')
        cf.destroy()
        return (dep, tokens)
def jupyter_draw_nltk_tree(tree: Tree = None,
                           directory: str = '/tmp',
                           f_name: str = 'tmp',
                           show_tree: bool = False):
    f_name = Path(directory) / f_name
    f_name.parent.mkdir(exist_ok=True, parents=True)

    cf = CanvasFrame()
    tc = TreeWidget(cf.canvas(), tree)
    tc['node_font'] = 'arial 13 bold'
    tc['leaf_font'] = 'arial 14'
    tc['node_color'] = '#005990'
    tc['leaf_color'] = '#3F8F57'
    tc['line_color'] = '#175252'

    cf.add_widget(tc, 20, 20)
    ps_file_name = f_name.with_suffix('.ps').as_posix()
    cf.print_to_file(ps_file_name)
    cf.destroy()

    png_file_name = f_name.with_suffix(".png").as_posix()
    system(f'convert {ps_file_name} {png_file_name}')

    if show_tree:
        display((Image(filename=png_file_name), ))

    system(f'rm {f_name}.ps')

    return png_file_name
def draw_to_file(tree):
    canvas = CanvasFrame()
    tree_canvas = TreeWidget(canvas.canvas(), tree)
    canvas.add_widget(tree_canvas, 10, 10)

    file_name = 'tree_plot.ps'

    canvas.print_to_file(file_name)
    canvas.destroy()
def draw_nltk_tree(tree):
    cf = CanvasFrame()
    tc = TreeWidget(cf.canvas(), tree)
    tc['node_font'] = 'arial 15 bold'
    tc['leaf_font'] = 'arial 15'
    tc['node_color'] = '#005990'
    tc['leaf_color'] = '#3F8F57'
    tc['line_color'] = '#175252'
    cf.add_widget(tc, 50, 50)
    cf.print_to_file('tmp_tree_output.ps')
    cf.destroy()
示例#10
0
def drawrst(strtree, fname):
    """ Draw RST tree into a file
    """
    if not fname.endswith(".ps"):
        fname += ".ps"
    cf = CanvasFrame()
    t = Tree.fromstring(strtree)
    tc = TreeWidget(cf.canvas(), t)
    cf.add_widget(tc,10,10) # (10,10) offsets
    cf.print_to_file(fname)
    cf.destroy()
示例#11
0
def save_tree_with_ps(str_tree, save_path):
	# 输出可视化结果
	cf = CanvasFrame()
	tc = TreeWidget(cf.canvas(), str_tree)
	tc['node_font'] = 'arial 14 bold'
	tc['leaf_font'] = 'arial 14'
	tc['node_color'] = '#005990'
	tc['leaf_color'] = '#3F8F57'
	tc['line_color'] = '#175252'
	cf.add_widget(tc, 10, 10) # (10,10) offsets
	cf.print_to_file('{}.ps'.format(save_path))
	cf.destroy()
示例#12
0
 def draw_rst(self, fname):
     """ Draw RST tree into a file
     """
     tree_str = self.get_parse()
     if not fname.endswith(".ps"):
         fname += ".ps"
     cf = CanvasFrame()
     t = Tree.fromstring(tree_str)
     tc = TreeWidget(cf.canvas(), t)
     cf.add_widget(tc, 10, 10)  # (10,10) offsets
     cf.print_to_file(fname)
     cf.destroy()
示例#13
0
def drawTreeAndSaveImage(s, i, dir):
    parsed_sent = parser.raw_parse(s)
    for line in parsed_sent:
        cf = CanvasFrame()
        t = Tree.fromstring(str(line))
        tc = TreeWidget(cf.canvas(), t)
        cf.add_widget(tc, 10, 10)
        i += 1
        cf.print_to_file(dir + str(i) + '.ps')
        tree_name = dir + str(i) + '.ps'
        tree_new_name = dir + str(i) + '.png'
        os.system('convert ' + tree_name + ' ' + tree_new_name)
        cf.destroy()
 def draw_nltk_tree(tree):
     cf = CanvasFrame()
     tc = TreeWidget(cf.canvas(), tree)
     tc['node_font'] = 'arial 15 bold'
     tc['leaf_font'] = 'arial 15'
     tc['node_color'] = '#005990'
     tc['leaf_color'] = '#3F8F57'
     tc['line_color'] = '#175252'
     cf.add_widget(tc, 50, 50)
     cf.print_to_file('tmp_tree_output.ps')
     cf.destroy()
     os.system('convert tmp_tree_output.ps tmp_tree_output.png')
     display(Image(filename='tmp_tree_output.png'))
def quicktree(sentence):
    """Parse a sentence and return a visual representation"""
    from nltk import Tree
    from nltk.draw.util import CanvasFrame
    from nltk.draw import TreeWidget
    from stat_parser import Parser
    from IPython.display import display
    from IPython.display import Image
    parser = Parser()
    parsed = parser.parse(sentence)
    cf = CanvasFrame()
    tc = TreeWidget(cf.canvas(), parsed)
    cf.add_widget(tc, 10, 10)  # (10,10) offsets
    cf.print_to_file('tree.ps')
    cf.destroy()
示例#16
0
def save_tree(str_tree, save_path):
	# 输出可视化结果
	cf = CanvasFrame()
	tc = TreeWidget(cf.canvas(), str_tree)
	tc['node_font'] = 'arial 14 bold'
	tc['leaf_font'] = 'arial 14'
	tc['node_color'] = '#005990'
	tc['leaf_color'] = '#3F8F57'
	tc['line_color'] = '#175252'
	cf.add_widget(tc, 10, 10) # (10,10) offsets
	cf.print_to_file('tmp.ps')
	cf.destroy()

	# 使用ImageMagick工具进行转换
	lake.shell.run('convert tmp.ps %s' % save_path)
	lake.shell.run('rm tmp.ps')
def draw_nltk_tree(tree):
    # Borrowed from
    # https://stackoverflow.com/questions/31779707/how-do-you-make-nltk-draw-trees-that-are-inline-in-ipython-jupyter

    cf = CanvasFrame()
    tc = TreeWidget(cf.canvas(), tree)
    tc['node_font'] = 'arial 15 bold'
    tc['leaf_font'] = 'arial 15'
    tc['node_color'] = '#005990'
    tc['leaf_color'] = '#3F8F57'
    tc['line_color'] = '#175252'
    cf.add_widget(tc, 50, 50)
    cf.print_to_file('tmp_tree_output.ps')
    cf.destroy()
    os.system('convert tmp_tree_output.ps tmp_tree_output.png')
    display(Image(filename='tmp_tree_output.png'))
    os.system('rm tmp_tree_output.ps tmp_tree_output.png')
示例#18
0
文件: draw.py 项目: eipiplusun/educe
def main(args):
    """
    Subcommand main.

    You shouldn't need to call this yourself if you're using
    `config_argparser`
    """
    corpus = read_corpus(args)
    odir = get_output_dir(args)
    for key in corpus:
        cframe = CanvasFrame()
        widget = TreeWidget(cframe.canvas(), corpus[key])
        cframe.add_widget(widget, 10, 10)
        ofilename = fp.join(odir, key.doc) + '.ps'
        cframe.print_to_file(ofilename)
        cframe.destroy()
    announce_output_dir(odir)
示例#19
0
def display_tree(tree):
    if nltk_is_available:
        count = 0
        for t in tree:
            cf = CanvasFrame()
            tc = TreeWidget(cf.canvas(), t)
            cf.add_widget(tc, 10, 10)
            count += 1
            fileName = "tree" + repr(count) + ".ps"
            cf.print_to_file(fileName)
            cf.destroy()
    else:
        count = 0
        for t in tree:
            count += 1
            fileName = "tree" + repr(count) + ".txt"
            pprint.pprint(t, fileName)
示例#20
0
def main(args):
    """
    Subcommand main.

    You shouldn't need to call this yourself if you're using
    `config_argparser`
    """
    corpus = read_corpus(args)
    odir = get_output_dir(args)
    for key in corpus:
        cframe = CanvasFrame()
        widget = TreeWidget(cframe.canvas(), corpus[key])
        cframe.add_widget(widget, 10, 10)
        ofilename = fp.join(odir, key.doc) + '.ps'
        cframe.print_to_file(ofilename)
        cframe.destroy()
    announce_output_dir(odir)
示例#21
0
def draw_trees(trees, name, reverse_dict=None, print_prob=False):
    for ind, tree in enumerate(trees):
        if reverse_dict:
            change_leaves(tree[0], reverse_dict)
        in_row = 1
        len_one = 140
        height = 200
        i = 0
        cf = CanvasFrame()
        tc = TreeWidget(cf.canvas(), tree[0])
        x, y = (i % in_row) * len_one, int(i / in_row) * height
        cf.add_widget(tc, x, y)
        if print_prob:
            tp = TreeWidget(cf.canvas(), tree[1])
            cf.add_widget(tp, x + len_one, y)
        i = i + 1
        cf.print_to_file('{0}{1}.ps'.format(name, ind))
        cf.destroy()
示例#22
0
def format(sentence, jar_location):
    path_to_jar = jar_location + '/stanford-parser.jar'
    path_to_models_jar = jar_location + '/stanford-parser-3.9.2-models.jar'

    dependency_parser = StanfordDependencyParser(
        path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar)
    tokens = word_tokenize(sentence)
    result = dependency_parser.raw_parse(sentence)

    for dep in result:
        # print(dep.tree())
        cf = CanvasFrame()
        t = dep.tree()
        tc = TreeWidget(cf.canvas(), t)
        cf.add_widget(tc, 10, 10)
        cf.print_to_file('tree.ps')
        cf.destroy()
        return (dep, tokens)
示例#23
0
    def convertPsToPng(self, treeVal):
        cf = CanvasFrame()

        tr = Tree.fromstring(treeVal)
        tc = TreeWidget(cf.canvas(), tr)

        tc['node_font'] = 'arial 13 bold'
        tc['leaf_font'] = 'arial 11'
        tc['node_color'] = '#005990'
        tc['leaf_color'] = '#3F8F57'
        tc['line_color'] = '#175252'
        tc['xspace'] = 25
        tc['yspace'] = 25

        cf.add_widget(tc, 10, 10)  # (10,10) offsets

        cf.print_to_file(self.filenamePS)
        cf.destroy()

        #MagickImage doit etre installée ainsi que convert
        os.system("convert %s %s" % (self.filenamePS, self.filenamePNG))
示例#24
0
def draw_one_tree(strtree, draw_file):
    cf = CanvasFrame()
    t = Tree.fromstring(strtree)
    tc = TreeWidget(cf.canvas(), t, draggable=1)
    cf.add_widget(tc, 1200, 0)  # (10,10) offsets
    # edus 文本
    edus_txt = ""
    c = cf.canvas()
    edu_path = RAW_TXT + "/" + draw_file.split("/")[2].split(
        ".")[0] + ".out.edu"
    with open(edu_path, "r") as f:
        for line in f:
            edus_txt += line
    edus_txt = TextWidget(c, edus_txt, draggable=1)
    cf.add_widget(edus_txt, 1400, 0)
    user_choice = input("直接打印(a) or 存到文件(b): ")
    if user_choice == "a":
        cf.mainloop()
    else:
        cf.print_to_file(draw_file)
        cf.destroy()
示例#25
0
def save_image(parsed, index: str) -> Binary:
    cf = CanvasFrame()
    tc = TreeWidget(cf.canvas(), parsed, xspace=40, yspace=40)
    tc['node_font'] = 'arial 20 bold'
    tc['leaf_font'] = 'arial 20 bold'
    tc['node_color'] = '#005990'
    tc['leaf_color'] = '#3F8F57'
    tc['line_color'] = '#175252'
    cf.add_widget(tc, 50, 50)

    cf.print_to_file(os.path.join(trees_path, f'tree_{index}.ps'))
    cf.destroy()

    os.system(f'convert {trees_path}/tree_{index}.ps '
              f'{trees_path}/tree_{index}.png')
    os.remove(f'{trees_path}/tree_{index}.ps')

    with open(f'{trees_path}/tree_{index}.png', 'rb') as tree_img:
        tree_bin = Binary(tree_img.read())

    return tree_bin
示例#26
0
def build_syntax_tree(txt):
    sentences = nltk.sent_tokenize(txt)
    for sent in sentences:
        tsent = pos_tag_sentence(sent, 'tree')
        ch = nltk.RegexpParser(grammar)
        tree = ch.parse(tsent)

        cf = CanvasFrame()
        tc = TreeWidget(cf.canvas(), tree)
        tc['node_font'] = 'arial 14 bold'
        tc['leaf_font'] = 'arial 14'
        tc['node_color'] = '#005990'
        tc['leaf_color'] = '#3F8F57'
        tc['line_color'] = '#175252'
        cf.add_widget(tc, 10, 10)
        cf.print_to_file(path + 'tree.ps')
        cf.destroy()

        os.system('convert {0}tree.ps {0}tree.png'.format(path))

        return open(path + 'tree.png', 'rb')
def make_tree_png(sentence, file_number, sentence_number, outputDirectory):
    '''
    sentence: bracket notation of the sentence
    file_number: file the sentence originates from
    sentence_number: number of sentences
    outputDirectory: directory for the files (has to exist)
    Creates a png image with a syntax tree from the bracket notation of the given sentence
    '''
    filename = 'parse{}_sentence{}'.format(file_number, sentence_number)
    # make syntax tree using nltk
    cf = CanvasFrame()
    t = Tree.fromstring(sentence) # make tree from sentence
    tc = TreeWidget(cf.canvas(),t)
    cf.add_widget(tc,10,10) # (10,10) offsets
    cf.print_to_file('./{}/{}.ps'.format(outputDirectory, filename)) # print tree in a .ps file
    cf.destroy()

    # convert the ps files to usable (non-transparent) png images
    filepath = "./{}/{}".format(outputDirectory, filename)

    os.system('convert {}.ps {}.png'.format(filepath, filepath)) #convert to png
    os.system('convert -flatten {}.png {}.png'.format(filepath, filepath)) #make bg white
    os.system('del ".\{}\{}.ps"'.format(outputDirectory, filename)) #delete old .ps files
示例#28
0
def save_tree(sentence):

    name = hashlib.md5(sentence).hexdigest()

    result = loads(server.parse(sentence))

    if os.path.exists(os.path.join(path, "{0}.jpg".format(name))):
        print "File exists"

    else:
        with cd(path):
            try:

                tree = result["sentences"][0]["parsetree"]
            except Exception as e:
                print "Error %s occurred while processing the sentence %s" % (
                    e, sentence)
                return [None, None, None]

            cf = CanvasFrame()
            t = nltk.tree.Tree.fromstring(tree)
            tc = TreeWidget(cf.canvas(), t)
            cf.add_widget(tc, 10, 10)  # (10,10) offsets
            cf.print_to_file('{0}.ps'.format(name))
            subprocess.call([
                "convert", "-density", "100", "{0}.ps".format(name), "-resize",
                "100%", "-gamma", "2.2", "-quality", "92",
                "{0}.jpg".format(name)
            ])
            #subprocess.call(["convert",  "{0}.ps".format(name), "{0}.jpg".format(name)])
            cf.destroy()

    return [
        "{0}/{1}.jpg".format(path,
                             name), result["sentences"][0]["dependencies"],
        result["sentences"][0]["indexeddependencies"]
    ]
示例#29
0
 tv -> 'gives' | 'give' | 'gave' | 'giving'
 v -> 'chased' | 'chase' | 'needs' | 'need' | 'hates' | 'hate' | 'has' | 'have' | 'loves' | 'love' | 'kicks' | 'kick' | 'jumps' | 'jump'
 adj -> 'scary' | 'tall' | 'short' | 'blonde' | 'slim' | 'fat'
 adv -> 'quickly' | 'slowly' | 'independently'
 n -> 'food' | 'cat' | 'cats' | 'dog' | 'dogs' | 'book' | 'books' | 'feather' | 'feathers' | 'baby' | 'babies' | 'boy' | 'boys' | 'girl' | 'girls' | 'icecream' | 'icecreams'
 pn -> 'mary' | 'john' | 'tomy'
 det -> 'the' | 'a' | 'an' 
 """)
#Είσοδος λίστας με προτάσεις όπου κάθε πρόταση είναι μια λίστα από λέξεις από τον χρήστη
input_sentences = input(
    "Takes as input a list of sentences and produces their syntax trees \nExample input [['the','dog','chased','the','cat'],['mary','loves','the','cats'],['the','dog','needs','food']]\nInput:"
)
input_sentences = check_and_return_input(input_sentences)
parser = nltk.ChartParser(groucho_grammar)
results = []
#Για κάθε πρόταση που δόθηκε ως input
for i in range(len(input_sentences)):
    #Γίνεται parse
    for tree in parser.parse(input_sentences[i]):
        print("Syntactic tree " + str(i + 1))
        print(tree)
        results.append(tree)
        #Γραφική αναπαράσταση των συντακτικών δέντρων σε ένα canvas και αποθήκευση
        #σε .ps αρχεία
        cf = CanvasFrame()
        tc = TreeWidget(cf.canvas(), tree)
        cf.add_widget(tc, 10, 10)  # (10,10) offsets
        cf.print_to_file("tree" + str(i) + ".ps")
        cf.destroy()
print("Result =", results)
write_file(results)
def demo():
    """
    A demonstration of the probabilistic parsers.  The user is
    prompted to select which demo to run, and how many parses should
    be found; and then each parser is run on the same demo, and a
    summary of the results are displayed.
    """
    import sys, time
    from nltk import tokenize
    from nltk.parse import ViterbiParser
    from nltk.grammar import toy_pcfg1, toy_pcfg2
    from nltk.draw.tree import draw_trees
    from nltk import Tree
    from nltk.draw.util import CanvasFrame
    from nltk.draw import TreeWidget

    # Define two demos.  Each demo has a sentence and a grammar.
    # demos = [('move the green sphere to the bottom left corner', learned_pcfg),
    #          ('move the green ball over the red block', learned_pcfg),
    #          ('take the green pyramid and put it in the top left corner', learned_pcfg),
    #           ('put the green pyramid on the red block', learned_pcfg),
    #           ('move the red cylinder and place it on top of the blue cylinder that is on top of a green cylinder', learned_pcfg),]

    # Ask the user which demo they want to use.
    # print()
    # for i in range(len(demos)):
    #     print('%3s: %s' % (i+1, demos[i][0]))
    #     print('     %r' % demos[i][1])
    #     print()
    # print('Which demo (%d-%d)? ' % (1, len(demos)), end=' ')
    # try:
    #     snum = int(sys.stdin.readline().strip())-1
    #     sent, grammar = demos[snum]
    # except:
    #     print('Bad sentence number')
    #     return

    max_scene = 1

    if max_scene<10:            sc = '0000'+str(max_scene)
    elif max_scene<100:         sc = '000'+str(max_scene)
    elif max_scene<1000:        sc = '00'+str(max_scene)
    elif max_scene<10000:       sc = '0'+str(max_scene)

    g = 'grammar_'+sc+'.txt'
    learned_pcfg = load('/home/omari/Dropbox/robot_modified/AR/grammar/'+g)
    grammar = learned_pcfg

    file1 = open('/home/omari/Dropbox/robot_modified/AR/hypotheses/matched_commands.txt', 'r')
    g1 = [i for i in file1.readlines()]
    for line in g1:
        line = unicode(line,encoding='utf-8')
        sent = line.split('\n')[0].split('-')[-1]
        scene = line.split('\n')[0].split('-')[0]
        sent_num = line.split('\n')[0].split('-')[1]
        print(line)
        if scene == '239' and sent_num == '0':  continue


        # Tokenize the sentence.
        tokens = sent.split()

        parser = ViterbiParser(grammar)
        all_parses = {}

        # print('\nsent: %s\nparser: %s\ngrammar: %s' % (sent,parser,grammar))
        parser.trace(3)
        parses = parser.parse_all(tokens)
        average = (reduce(lambda a,b:a+b.prob(), parses, 0)/len(parses)
                   if parses else 0)
        num_parses = len(parses)
        for p in parses:
            all_parses[p.freeze()] = 1

        # Print some summary statistics
        # print()
        # print('Time (secs)   # Parses   Average P(parse)')
        # print('-----------------------------------------')
        # print('%11.4f%11d%19.14f' % (time, num_parses, average))
        parses = all_parses.keys()
        if parses:
            p = reduce(lambda a,b:a+b.prob(), parses, 0)/len(parses)
        else: p = 0
        # print('------------------------------------------')
        # print('%11s%11d%19.14f' % ('n/a', len(parses), p))

        # Ask the user if we should draw the parses.
        # print()
        # print('Draw parses (y/n)? ', end=' ')
        # if sys.stdin.readline().strip().lower().startswith('y'):

        #     print('  please wait...')
        # draw_trees(*parses)

        cf = CanvasFrame()
        # t = Tree(parses)
        t = Tree.fromstring('(S  (CH_POS_PREPOST move)  (PRE_POST    (PRE      (the the)      (_entity (F_HSV green) (F_SHAPE sphere)))    (PREPOST_connect (to to) (the the))    (POST      (_F_POS (F_POS (_bottom_left (bottom bottom) (left left)))) (corner corner))))')

        tc = TreeWidget(cf.canvas(), t, draggable=1,
                        node_font=('helvetica', -14),
                        leaf_font=('helvetica', -12),
                        roof_fill='white', roof_color='black',
                        leaf_color='green4', node_color='blue4')
        cf.add_widget(tc,10,10)

        # tc = TreeWidget(cf.canvas(),t)
        # cf.add_widget(tc,10,10) # (10,10) offsets
        cf.print_to_file('/home/omari/Dropbox/robot_modified/trees/scene-'+scene+'-'+sent_num+'.ps')
        cf.destroy()
{<NOUN><IN><ADJ><NOUN>}
{<ADJ><NOUN><IN><NOUN>}
{<NOUN><IN><NOUN>} # 名词+介词+名词
ADV_ADJ:{<RB><ADJ>} # 副词+形容词
ADJ_PREP_NOUN:{<ADJ><IN><NOUN>} # 形容词+介词+名词
"""

cp = nltk.RegexpParser(grammar)

if len(sys.argv) < 2:
    sys.exit(0)

for s in sys.argv[1:]:
    print('*****************************')
    print(s)
    tags = nltk.pos_tag(nltk.word_tokenize(s))
    tree = nltk.chunk.ne_chunk(tags)
    print(str(tree))
    cf = CanvasFrame()
    tc = TreeWidget(cf.canvas(), tree)
    cf.add_widget(tc, 10, 10)
    cf.print_to_file(s+'.1.ps')
    cf.destroy()
    tree = cp.parse(tags)

    print(str(tree))
    cf = CanvasFrame()
    tc = TreeWidget(cf.canvas(), tree)
    cf.add_widget(tc, 10, 10)
    cf.print_to_file(s+'.2.ps')
示例#32
0
def format(jar_location):

    path_to_jar = jar_location + '/stanford-parser.jar'
    path_to_models_jar = jar_location + '/stanford-parser-3.9.2-models.jar'
    sentence = input("Enter a sentence : ")
    dependency_parser = StanfordDependencyParser(
        path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar)
    tokens = word_tokenize(sentence)
    print(" ======== TOKENS =======")
    print(tokens)
    result = dependency_parser.raw_parse(sentence)
    print("\n")

    for dep in result:
        cf = CanvasFrame()
        t = dep.tree()
        tc = TreeWidget(cf.canvas(), t)
        cf.add_widget(tc, 10, 10)
        cf.print_to_file('tree.ps')
        cf.destroy()
    triples = dep.triples()

    parsed_tree = dep.to_conll(10)
    lines = []
    line = []
    print(parsed_tree)
    tree = parsed_tree.split("\n")
    for i in range(0, len(tree)):
        line = tree[i].split("\t")
        lines.append(line)

    lines = [line for line in lines if len(line) == 10]
    verbs = {}
    print("=========  Verbs in Sentence ==========")
    for line in lines:
        if line[3][0] == "V":
            print(" Word : ", line[1])
            print(" Tag: ", line[3])
            verbs[int(line[0])] = line[3]

    if (len(verbs) >= 2):
        print("Sentence : \"", sentence, "\" is complex!")
    else:
        print("Sentence : ", sentence, " is simple!")
        exit()
    print("=======================================\n")

    dict_tree, dep_tree = create_dict(lines)

    word_dep = {}
    # Find all the dependencies of the verbs
    for i in range(0, int(lines[-1][0]) + 1):
        word_dep[i] = get_children(i, dict_tree)

    print("============== Word Dependencies =============")
    print(word_dep)

    # for verb in verbs.keys():
    #     if len(word_dep[verb]) > 0:
    #         child_nodes = word_dep[verb]
    #     else:
    #         continue
    #     new_children = []
    #     for child in child_nodes:
    #         new_children.extend(word_dep[child])
    #     word_dep[verb].extend(new_children)

    # for verb in verbs.keys():
    #     word_dep[verb] = sorted(word_dep[verb])
    #     print(word_dep[verb])
    #     print("-------------------------------------")

    # verb_index = list(verbs.keys())
    # for i in range(0,len(verb_index)):
    #     for j in range(i+1, len(verb_index)):
    #         if lines[j][4][0] != 'V':
    #             print(" POS TAG: ",lines[j][4])
    #             word_dep[verb_index[i]] = list(set(word_dep[verb_index[i]]) - set(word_dep[verb_index[j]]))

    # for verb in verbs.keys():
    #     print(word_dep[verb])
    #     print("-------------------------------------")

    clause_start = []
    triples_list = []
    print("=============== TRIPLES =============")
    for triple in triples:
        w1, rel, w2 = triple
        print(rel, w1, w2)
        if rel == 'nsubj' or rel == 'nsubjpass':
            clause_start.append(triple)
        triples_list.append(triple)

    print("\n\n\n")

    clause_words = {}
    for clause in clause_start:
        w1, rel, w2 = clause
        print("Clause : ", clause)
        clause_words[clause] = []
        for triple in triples_list:
            word1, reln, word2 = triple
            # print("Triple for Clause: ", reln, word1, word2)
            if cmp(triple, clause) == False:
                if word1 == w1 or word1 == w2 and (reln != 'nsubj'
                                                   and reln != 'nsubjpass'):
                    clause_words[clause].append(triple)
                    clause_words[clause].extend(
                        find_all_deps(triple, triples_list))
                elif word2 == w1 or word2 == w2 and (reln != 'nsubj'
                                                     and reln != 'nsubjpass'):
                    clause_words[clause].append(triple)
                    clause_words[clause].extend(
                        find_all_deps(triple, triples_list, []))

    # for clause in clause_words:
    #     clause_list = clause_words[clause]
    #     for i in range(0,len(clause_list)):
    #         print("Dep: ",dep)
    #         dep_list = find_all_deps(tuple(clause_list[i]), triples_list)
    #         for dep in dep_list:
    #             if dep not in clause_list:
    #                 clause_list.append(dep)

    print("\n\n============= Clause Boundary ==============")
    for clause in clause_words:
        print("Clause: ", clause)
        words = []
        for clause_list in clause_words[clause]:
            w1, rel, w2 = clause_list
            words.append(w1[0])
            words.append(w2[0])
        words = set(words)
        word_ind = []
        for i in range(len(tokens)):
            if tokens[i] in words:
                word_ind.append(i)
        print(sorted(word_ind))
        word_ind = sorted(word_ind)
        add_list = []
        for w in word_ind:
            word_ind.extend(list(word_dep[w]))
        word_ind = sorted(set(word_ind))
        print(word_ind)
        print("=======================================")
def save_tree(name, tree):
    cf = CanvasFrame()
    tc = TreeWidget(cf.canvas(), tree)
    cf.add_widget(tc, 10, 10)  # (10,10) offsets
    cf.print_to_file(name + ".jpg")
    cf.destroy()
示例#34
0
def generate_all_trees():
    NUM_TO_KEEP = 10
    s1 = [1, 2, 3, 4, 5]
    s2 = [3, 4, 5, 1, 2]
    s3 = [1, 2, 3, 4]
    s4 = [3, 4, 5]
    s5 = [1, 2]
    kx = []
    ky = []
    min = []
    strings = [s1, s2, s4, s5]
    strings = [s for s in strings]
    curr_sol = []
    max_len = -1
    terminals = set()
    for c in sum(strings, []):
        terminals.add(c)
    t = len(terminals)
    for t1, t2, t3, t4 in product(
            *[generate_trees(s, max_len=max_len) for s in strings]):
        trees = [t1, t2, t3, t4]
        s = SimpleTeacher()
        total_nodes = sum([count_inner_nodes(t) for t in trees])
        for tree in trees:
            s.addPositiveExample(tree)
        c = learn(s, {})
        """
        nt_set = set()
        for p in c.productions():
            nt_set.add(p.lhs())
        nt = len(nt_set)
        p = float(total_nodes)/float(nt-1-t)
        """
        p, nt = measure_generalization(trees, c)
        kx.append(nt)
        ky.append(p)
        if len(min) < NUM_TO_KEEP:
            min.append(p)
            curr_sol.append((c, tuple(trees)))
        if any([p >= m for m in min]):
            for i, m in enumerate(min):
                if p >= m:
                    curr_sol[i] = (c, tuple(trees))
                    min[i] = p
                    break
    print(sum(min) / len(min))
    print(curr_sol[0])
    plt.scatter(kx, ky)
    plt.show()
    for ind, sol in enumerate(curr_sol):
        print('p')
        in_row = 4
        len_one = 140
        height = 200
        i = 0
        cf = CanvasFrame()
        for t in sol[1]:
            tc = TreeWidget(cf.canvas(), t)
            x, y = (i % in_row) * len_one, int(i / in_row) * height
            print(x, y)
            cf.add_widget(tc, x, y)
            i = i + 1
        cf.print_to_file('trees' + str(ind) + '.ps')
        cf.destroy()