def save_tree(tree, d, name, postscript=False, **kwargs): drawable_tree = tree if d is None else get_drawable_tree(tree[0], d) extension = 'ps' if postscript else 'png' cf = CanvasFrame() tc = TreeWidget(cf.canvas(), drawable_tree) tc['node_font'] = 'arial 22 bold' tc['leaf_font'] = 'arial 22' tc['node_color'] = '#005990' tc['leaf_color'] = '#3F8F57' tc['line_color'] = '#175252' tc['xspace'] = 20 tc['yspace'] = 20 curr_y = 40 * len(kwargs) cf.add_widget(tc, 0, curr_y) cf.print_to_file('{0}.{1}'.format(name, extension)) if not postscript: im1 = Image.open('{0}.{1}'.format(name, extension)) im1 = im1.convert('RGB') curr_y = 10 for key in kwargs: font = ImageFont.truetype("/fonts/Ubuntu-L.ttf", 24) draw = ImageDraw.Draw(im1) val = kwargs[key] format_str = '{0}' if isinstance(val, float): format_str = format_str + '={1:.4f}' else: format_str = format_str + '= {1}' draw.text((10, curr_y), format_str.format(key, val), (0, 0, 0), font) curr_y = curr_y + 40 im1.save('{}.pdf'.format(name)) cf.destroy() os.remove('{0}.{1}'.format(name, extension)) return '{0}.{1}'.format(name, 'ps' if postscript else 'pdf')
def quicktree(sentence): """Parse a sentence and return a visual representation in IPython""" import os from nltk import Tree from nltk.draw.util import CanvasFrame from nltk.draw import TreeWidget from stat_parser import Parser try: from IPython.display import display from IPython.display import Image except: pass try: get_ipython().getoutput() except TypeError: have_ipython = True except NameError: import subprocess have_ipython = False parser = Parser() parsed = parser.parse(sentence) cf = CanvasFrame() tc = TreeWidget(cf.canvas(), parsed) cf.add_widget(tc, 10, 10) # (10,10) offsets cf.print_to_file('tree.ps') cf.destroy() if have_ipython: tregex_command = 'convert tree.ps tree.png' result = get_ipython().getoutput(tregex_command) else: tregex_command = ["convert", "tree.ps", "tree.png"] result = subprocess.check_output(tregex_command) os.remove("tree.ps") return Image(filename='tree.png') os.remove("tree.png")
def draw_tree_to_file(tree, path): cf = CanvasFrame() tc = TreeWidget(cf.canvas(), tree) x, y = 0, 0 cf.add_widget(tc, x, y) cf.print_to_file(path) cf.destroy()
def save_tree(tree): cf = CanvasFrame() tc = TreeWidget(cf.canvas(),tree) cf.add_widget(tc,10,10) # (10,10) offsets cf.print_to_file('tree.ps') cf.destroy() os.system('convert tree.ps output.png')
def quicktree(sentence): """Parse a sentence and return a visual representation in IPython""" import os from nltk import Tree from nltk.draw.util import CanvasFrame from nltk.draw import TreeWidget from stat_parser import Parser try: from IPython.display import display from IPython.display import Image except: pass try: get_ipython().getoutput() except TypeError: have_ipython = True except NameError: import subprocess have_ipython = False parser = Parser() parsed = parser.parse(sentence) cf = CanvasFrame() tc = TreeWidget(cf.canvas(),parsed) cf.add_widget(tc,10,10) # (10,10) offsets cf.print_to_file('tree.ps') cf.destroy() if have_ipython: tregex_command = 'convert tree.ps tree.png' result = get_ipython().getoutput(tregex_command) else: tregex_command = ["convert", "tree.ps", "tree.png"] result = subprocess.check_output(tregex_command) os.remove("tree.ps") return Image(filename='tree.png') os.remove("tree.png")
def format(sentence): filename = 'stanford-parser.jar' command = ['locate', filename] output = subprocess.Popen(command, stdout=subprocess.PIPE).communicate()[0] path_to_jar = output.decode().strip() filename = 'models.jar' command = ['locate', filename] output = subprocess.Popen( command, stdout=subprocess.PIPE).communicate()[0].decode().strip() output = output.split('\n') for op in output: if 'parse' in op: path_to_models_jar = op dependency_parser = StanfordDependencyParser( path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar) tokens = word_tokenize(sentence) result = dependency_parser.raw_parse(sentence) for dep in result: # print(dep.tree()) cf = CanvasFrame() t = dep.tree() tc = TreeWidget(cf.canvas(), t) cf.add_widget(tc, 10, 10) # (10,10) offsets cf.print_to_file('tree.ps') cf.destroy() return (dep, tokens)
def jupyter_draw_nltk_tree(tree: Tree = None, directory: str = '/tmp', f_name: str = 'tmp', show_tree: bool = False): f_name = Path(directory) / f_name f_name.parent.mkdir(exist_ok=True, parents=True) cf = CanvasFrame() tc = TreeWidget(cf.canvas(), tree) tc['node_font'] = 'arial 13 bold' tc['leaf_font'] = 'arial 14' tc['node_color'] = '#005990' tc['leaf_color'] = '#3F8F57' tc['line_color'] = '#175252' cf.add_widget(tc, 20, 20) ps_file_name = f_name.with_suffix('.ps').as_posix() cf.print_to_file(ps_file_name) cf.destroy() png_file_name = f_name.with_suffix(".png").as_posix() system(f'convert {ps_file_name} {png_file_name}') if show_tree: display((Image(filename=png_file_name), )) system(f'rm {f_name}.ps') return png_file_name
def draw_to_file(tree): canvas = CanvasFrame() tree_canvas = TreeWidget(canvas.canvas(), tree) canvas.add_widget(tree_canvas, 10, 10) file_name = 'tree_plot.ps' canvas.print_to_file(file_name) canvas.destroy()
def draw_nltk_tree(tree): cf = CanvasFrame() tc = TreeWidget(cf.canvas(), tree) tc['node_font'] = 'arial 15 bold' tc['leaf_font'] = 'arial 15' tc['node_color'] = '#005990' tc['leaf_color'] = '#3F8F57' tc['line_color'] = '#175252' cf.add_widget(tc, 50, 50) cf.print_to_file('tmp_tree_output.ps') cf.destroy()
def drawrst(strtree, fname): """ Draw RST tree into a file """ if not fname.endswith(".ps"): fname += ".ps" cf = CanvasFrame() t = Tree.fromstring(strtree) tc = TreeWidget(cf.canvas(), t) cf.add_widget(tc,10,10) # (10,10) offsets cf.print_to_file(fname) cf.destroy()
def save_tree_with_ps(str_tree, save_path): # 输出可视化结果 cf = CanvasFrame() tc = TreeWidget(cf.canvas(), str_tree) tc['node_font'] = 'arial 14 bold' tc['leaf_font'] = 'arial 14' tc['node_color'] = '#005990' tc['leaf_color'] = '#3F8F57' tc['line_color'] = '#175252' cf.add_widget(tc, 10, 10) # (10,10) offsets cf.print_to_file('{}.ps'.format(save_path)) cf.destroy()
def draw_rst(self, fname): """ Draw RST tree into a file """ tree_str = self.get_parse() if not fname.endswith(".ps"): fname += ".ps" cf = CanvasFrame() t = Tree.fromstring(tree_str) tc = TreeWidget(cf.canvas(), t) cf.add_widget(tc, 10, 10) # (10,10) offsets cf.print_to_file(fname) cf.destroy()
def drawTreeAndSaveImage(s, i, dir): parsed_sent = parser.raw_parse(s) for line in parsed_sent: cf = CanvasFrame() t = Tree.fromstring(str(line)) tc = TreeWidget(cf.canvas(), t) cf.add_widget(tc, 10, 10) i += 1 cf.print_to_file(dir + str(i) + '.ps') tree_name = dir + str(i) + '.ps' tree_new_name = dir + str(i) + '.png' os.system('convert ' + tree_name + ' ' + tree_new_name) cf.destroy()
def draw_nltk_tree(tree): cf = CanvasFrame() tc = TreeWidget(cf.canvas(), tree) tc['node_font'] = 'arial 15 bold' tc['leaf_font'] = 'arial 15' tc['node_color'] = '#005990' tc['leaf_color'] = '#3F8F57' tc['line_color'] = '#175252' cf.add_widget(tc, 50, 50) cf.print_to_file('tmp_tree_output.ps') cf.destroy() os.system('convert tmp_tree_output.ps tmp_tree_output.png') display(Image(filename='tmp_tree_output.png'))
def quicktree(sentence): """Parse a sentence and return a visual representation""" from nltk import Tree from nltk.draw.util import CanvasFrame from nltk.draw import TreeWidget from stat_parser import Parser from IPython.display import display from IPython.display import Image parser = Parser() parsed = parser.parse(sentence) cf = CanvasFrame() tc = TreeWidget(cf.canvas(), parsed) cf.add_widget(tc, 10, 10) # (10,10) offsets cf.print_to_file('tree.ps') cf.destroy()
def save_tree(str_tree, save_path): # 输出可视化结果 cf = CanvasFrame() tc = TreeWidget(cf.canvas(), str_tree) tc['node_font'] = 'arial 14 bold' tc['leaf_font'] = 'arial 14' tc['node_color'] = '#005990' tc['leaf_color'] = '#3F8F57' tc['line_color'] = '#175252' cf.add_widget(tc, 10, 10) # (10,10) offsets cf.print_to_file('tmp.ps') cf.destroy() # 使用ImageMagick工具进行转换 lake.shell.run('convert tmp.ps %s' % save_path) lake.shell.run('rm tmp.ps')
def draw_nltk_tree(tree): # Borrowed from # https://stackoverflow.com/questions/31779707/how-do-you-make-nltk-draw-trees-that-are-inline-in-ipython-jupyter cf = CanvasFrame() tc = TreeWidget(cf.canvas(), tree) tc['node_font'] = 'arial 15 bold' tc['leaf_font'] = 'arial 15' tc['node_color'] = '#005990' tc['leaf_color'] = '#3F8F57' tc['line_color'] = '#175252' cf.add_widget(tc, 50, 50) cf.print_to_file('tmp_tree_output.ps') cf.destroy() os.system('convert tmp_tree_output.ps tmp_tree_output.png') display(Image(filename='tmp_tree_output.png')) os.system('rm tmp_tree_output.ps tmp_tree_output.png')
def main(args): """ Subcommand main. You shouldn't need to call this yourself if you're using `config_argparser` """ corpus = read_corpus(args) odir = get_output_dir(args) for key in corpus: cframe = CanvasFrame() widget = TreeWidget(cframe.canvas(), corpus[key]) cframe.add_widget(widget, 10, 10) ofilename = fp.join(odir, key.doc) + '.ps' cframe.print_to_file(ofilename) cframe.destroy() announce_output_dir(odir)
def display_tree(tree): if nltk_is_available: count = 0 for t in tree: cf = CanvasFrame() tc = TreeWidget(cf.canvas(), t) cf.add_widget(tc, 10, 10) count += 1 fileName = "tree" + repr(count) + ".ps" cf.print_to_file(fileName) cf.destroy() else: count = 0 for t in tree: count += 1 fileName = "tree" + repr(count) + ".txt" pprint.pprint(t, fileName)
def draw_trees(trees, name, reverse_dict=None, print_prob=False): for ind, tree in enumerate(trees): if reverse_dict: change_leaves(tree[0], reverse_dict) in_row = 1 len_one = 140 height = 200 i = 0 cf = CanvasFrame() tc = TreeWidget(cf.canvas(), tree[0]) x, y = (i % in_row) * len_one, int(i / in_row) * height cf.add_widget(tc, x, y) if print_prob: tp = TreeWidget(cf.canvas(), tree[1]) cf.add_widget(tp, x + len_one, y) i = i + 1 cf.print_to_file('{0}{1}.ps'.format(name, ind)) cf.destroy()
def format(sentence, jar_location): path_to_jar = jar_location + '/stanford-parser.jar' path_to_models_jar = jar_location + '/stanford-parser-3.9.2-models.jar' dependency_parser = StanfordDependencyParser( path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar) tokens = word_tokenize(sentence) result = dependency_parser.raw_parse(sentence) for dep in result: # print(dep.tree()) cf = CanvasFrame() t = dep.tree() tc = TreeWidget(cf.canvas(), t) cf.add_widget(tc, 10, 10) cf.print_to_file('tree.ps') cf.destroy() return (dep, tokens)
def convertPsToPng(self, treeVal): cf = CanvasFrame() tr = Tree.fromstring(treeVal) tc = TreeWidget(cf.canvas(), tr) tc['node_font'] = 'arial 13 bold' tc['leaf_font'] = 'arial 11' tc['node_color'] = '#005990' tc['leaf_color'] = '#3F8F57' tc['line_color'] = '#175252' tc['xspace'] = 25 tc['yspace'] = 25 cf.add_widget(tc, 10, 10) # (10,10) offsets cf.print_to_file(self.filenamePS) cf.destroy() #MagickImage doit etre installée ainsi que convert os.system("convert %s %s" % (self.filenamePS, self.filenamePNG))
def draw_one_tree(strtree, draw_file): cf = CanvasFrame() t = Tree.fromstring(strtree) tc = TreeWidget(cf.canvas(), t, draggable=1) cf.add_widget(tc, 1200, 0) # (10,10) offsets # edus 文本 edus_txt = "" c = cf.canvas() edu_path = RAW_TXT + "/" + draw_file.split("/")[2].split( ".")[0] + ".out.edu" with open(edu_path, "r") as f: for line in f: edus_txt += line edus_txt = TextWidget(c, edus_txt, draggable=1) cf.add_widget(edus_txt, 1400, 0) user_choice = input("直接打印(a) or 存到文件(b): ") if user_choice == "a": cf.mainloop() else: cf.print_to_file(draw_file) cf.destroy()
def save_image(parsed, index: str) -> Binary: cf = CanvasFrame() tc = TreeWidget(cf.canvas(), parsed, xspace=40, yspace=40) tc['node_font'] = 'arial 20 bold' tc['leaf_font'] = 'arial 20 bold' tc['node_color'] = '#005990' tc['leaf_color'] = '#3F8F57' tc['line_color'] = '#175252' cf.add_widget(tc, 50, 50) cf.print_to_file(os.path.join(trees_path, f'tree_{index}.ps')) cf.destroy() os.system(f'convert {trees_path}/tree_{index}.ps ' f'{trees_path}/tree_{index}.png') os.remove(f'{trees_path}/tree_{index}.ps') with open(f'{trees_path}/tree_{index}.png', 'rb') as tree_img: tree_bin = Binary(tree_img.read()) return tree_bin
def build_syntax_tree(txt): sentences = nltk.sent_tokenize(txt) for sent in sentences: tsent = pos_tag_sentence(sent, 'tree') ch = nltk.RegexpParser(grammar) tree = ch.parse(tsent) cf = CanvasFrame() tc = TreeWidget(cf.canvas(), tree) tc['node_font'] = 'arial 14 bold' tc['leaf_font'] = 'arial 14' tc['node_color'] = '#005990' tc['leaf_color'] = '#3F8F57' tc['line_color'] = '#175252' cf.add_widget(tc, 10, 10) cf.print_to_file(path + 'tree.ps') cf.destroy() os.system('convert {0}tree.ps {0}tree.png'.format(path)) return open(path + 'tree.png', 'rb')
def make_tree_png(sentence, file_number, sentence_number, outputDirectory): ''' sentence: bracket notation of the sentence file_number: file the sentence originates from sentence_number: number of sentences outputDirectory: directory for the files (has to exist) Creates a png image with a syntax tree from the bracket notation of the given sentence ''' filename = 'parse{}_sentence{}'.format(file_number, sentence_number) # make syntax tree using nltk cf = CanvasFrame() t = Tree.fromstring(sentence) # make tree from sentence tc = TreeWidget(cf.canvas(),t) cf.add_widget(tc,10,10) # (10,10) offsets cf.print_to_file('./{}/{}.ps'.format(outputDirectory, filename)) # print tree in a .ps file cf.destroy() # convert the ps files to usable (non-transparent) png images filepath = "./{}/{}".format(outputDirectory, filename) os.system('convert {}.ps {}.png'.format(filepath, filepath)) #convert to png os.system('convert -flatten {}.png {}.png'.format(filepath, filepath)) #make bg white os.system('del ".\{}\{}.ps"'.format(outputDirectory, filename)) #delete old .ps files
def save_tree(sentence): name = hashlib.md5(sentence).hexdigest() result = loads(server.parse(sentence)) if os.path.exists(os.path.join(path, "{0}.jpg".format(name))): print "File exists" else: with cd(path): try: tree = result["sentences"][0]["parsetree"] except Exception as e: print "Error %s occurred while processing the sentence %s" % ( e, sentence) return [None, None, None] cf = CanvasFrame() t = nltk.tree.Tree.fromstring(tree) tc = TreeWidget(cf.canvas(), t) cf.add_widget(tc, 10, 10) # (10,10) offsets cf.print_to_file('{0}.ps'.format(name)) subprocess.call([ "convert", "-density", "100", "{0}.ps".format(name), "-resize", "100%", "-gamma", "2.2", "-quality", "92", "{0}.jpg".format(name) ]) #subprocess.call(["convert", "{0}.ps".format(name), "{0}.jpg".format(name)]) cf.destroy() return [ "{0}/{1}.jpg".format(path, name), result["sentences"][0]["dependencies"], result["sentences"][0]["indexeddependencies"] ]
tv -> 'gives' | 'give' | 'gave' | 'giving' v -> 'chased' | 'chase' | 'needs' | 'need' | 'hates' | 'hate' | 'has' | 'have' | 'loves' | 'love' | 'kicks' | 'kick' | 'jumps' | 'jump' adj -> 'scary' | 'tall' | 'short' | 'blonde' | 'slim' | 'fat' adv -> 'quickly' | 'slowly' | 'independently' n -> 'food' | 'cat' | 'cats' | 'dog' | 'dogs' | 'book' | 'books' | 'feather' | 'feathers' | 'baby' | 'babies' | 'boy' | 'boys' | 'girl' | 'girls' | 'icecream' | 'icecreams' pn -> 'mary' | 'john' | 'tomy' det -> 'the' | 'a' | 'an' """) #Είσοδος λίστας με προτάσεις όπου κάθε πρόταση είναι μια λίστα από λέξεις από τον χρήστη input_sentences = input( "Takes as input a list of sentences and produces their syntax trees \nExample input [['the','dog','chased','the','cat'],['mary','loves','the','cats'],['the','dog','needs','food']]\nInput:" ) input_sentences = check_and_return_input(input_sentences) parser = nltk.ChartParser(groucho_grammar) results = [] #Για κάθε πρόταση που δόθηκε ως input for i in range(len(input_sentences)): #Γίνεται parse for tree in parser.parse(input_sentences[i]): print("Syntactic tree " + str(i + 1)) print(tree) results.append(tree) #Γραφική αναπαράσταση των συντακτικών δέντρων σε ένα canvas και αποθήκευση #σε .ps αρχεία cf = CanvasFrame() tc = TreeWidget(cf.canvas(), tree) cf.add_widget(tc, 10, 10) # (10,10) offsets cf.print_to_file("tree" + str(i) + ".ps") cf.destroy() print("Result =", results) write_file(results)
def demo(): """ A demonstration of the probabilistic parsers. The user is prompted to select which demo to run, and how many parses should be found; and then each parser is run on the same demo, and a summary of the results are displayed. """ import sys, time from nltk import tokenize from nltk.parse import ViterbiParser from nltk.grammar import toy_pcfg1, toy_pcfg2 from nltk.draw.tree import draw_trees from nltk import Tree from nltk.draw.util import CanvasFrame from nltk.draw import TreeWidget # Define two demos. Each demo has a sentence and a grammar. # demos = [('move the green sphere to the bottom left corner', learned_pcfg), # ('move the green ball over the red block', learned_pcfg), # ('take the green pyramid and put it in the top left corner', learned_pcfg), # ('put the green pyramid on the red block', learned_pcfg), # ('move the red cylinder and place it on top of the blue cylinder that is on top of a green cylinder', learned_pcfg),] # Ask the user which demo they want to use. # print() # for i in range(len(demos)): # print('%3s: %s' % (i+1, demos[i][0])) # print(' %r' % demos[i][1]) # print() # print('Which demo (%d-%d)? ' % (1, len(demos)), end=' ') # try: # snum = int(sys.stdin.readline().strip())-1 # sent, grammar = demos[snum] # except: # print('Bad sentence number') # return max_scene = 1 if max_scene<10: sc = '0000'+str(max_scene) elif max_scene<100: sc = '000'+str(max_scene) elif max_scene<1000: sc = '00'+str(max_scene) elif max_scene<10000: sc = '0'+str(max_scene) g = 'grammar_'+sc+'.txt' learned_pcfg = load('/home/omari/Dropbox/robot_modified/AR/grammar/'+g) grammar = learned_pcfg file1 = open('/home/omari/Dropbox/robot_modified/AR/hypotheses/matched_commands.txt', 'r') g1 = [i for i in file1.readlines()] for line in g1: line = unicode(line,encoding='utf-8') sent = line.split('\n')[0].split('-')[-1] scene = line.split('\n')[0].split('-')[0] sent_num = line.split('\n')[0].split('-')[1] print(line) if scene == '239' and sent_num == '0': continue # Tokenize the sentence. tokens = sent.split() parser = ViterbiParser(grammar) all_parses = {} # print('\nsent: %s\nparser: %s\ngrammar: %s' % (sent,parser,grammar)) parser.trace(3) parses = parser.parse_all(tokens) average = (reduce(lambda a,b:a+b.prob(), parses, 0)/len(parses) if parses else 0) num_parses = len(parses) for p in parses: all_parses[p.freeze()] = 1 # Print some summary statistics # print() # print('Time (secs) # Parses Average P(parse)') # print('-----------------------------------------') # print('%11.4f%11d%19.14f' % (time, num_parses, average)) parses = all_parses.keys() if parses: p = reduce(lambda a,b:a+b.prob(), parses, 0)/len(parses) else: p = 0 # print('------------------------------------------') # print('%11s%11d%19.14f' % ('n/a', len(parses), p)) # Ask the user if we should draw the parses. # print() # print('Draw parses (y/n)? ', end=' ') # if sys.stdin.readline().strip().lower().startswith('y'): # print(' please wait...') # draw_trees(*parses) cf = CanvasFrame() # t = Tree(parses) t = Tree.fromstring('(S (CH_POS_PREPOST move) (PRE_POST (PRE (the the) (_entity (F_HSV green) (F_SHAPE sphere))) (PREPOST_connect (to to) (the the)) (POST (_F_POS (F_POS (_bottom_left (bottom bottom) (left left)))) (corner corner))))') tc = TreeWidget(cf.canvas(), t, draggable=1, node_font=('helvetica', -14), leaf_font=('helvetica', -12), roof_fill='white', roof_color='black', leaf_color='green4', node_color='blue4') cf.add_widget(tc,10,10) # tc = TreeWidget(cf.canvas(),t) # cf.add_widget(tc,10,10) # (10,10) offsets cf.print_to_file('/home/omari/Dropbox/robot_modified/trees/scene-'+scene+'-'+sent_num+'.ps') cf.destroy()
{<NOUN><IN><ADJ><NOUN>} {<ADJ><NOUN><IN><NOUN>} {<NOUN><IN><NOUN>} # 名词+介词+名词 ADV_ADJ:{<RB><ADJ>} # 副词+形容词 ADJ_PREP_NOUN:{<ADJ><IN><NOUN>} # 形容词+介词+名词 """ cp = nltk.RegexpParser(grammar) if len(sys.argv) < 2: sys.exit(0) for s in sys.argv[1:]: print('*****************************') print(s) tags = nltk.pos_tag(nltk.word_tokenize(s)) tree = nltk.chunk.ne_chunk(tags) print(str(tree)) cf = CanvasFrame() tc = TreeWidget(cf.canvas(), tree) cf.add_widget(tc, 10, 10) cf.print_to_file(s+'.1.ps') cf.destroy() tree = cp.parse(tags) print(str(tree)) cf = CanvasFrame() tc = TreeWidget(cf.canvas(), tree) cf.add_widget(tc, 10, 10) cf.print_to_file(s+'.2.ps')
def format(jar_location): path_to_jar = jar_location + '/stanford-parser.jar' path_to_models_jar = jar_location + '/stanford-parser-3.9.2-models.jar' sentence = input("Enter a sentence : ") dependency_parser = StanfordDependencyParser( path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar) tokens = word_tokenize(sentence) print(" ======== TOKENS =======") print(tokens) result = dependency_parser.raw_parse(sentence) print("\n") for dep in result: cf = CanvasFrame() t = dep.tree() tc = TreeWidget(cf.canvas(), t) cf.add_widget(tc, 10, 10) cf.print_to_file('tree.ps') cf.destroy() triples = dep.triples() parsed_tree = dep.to_conll(10) lines = [] line = [] print(parsed_tree) tree = parsed_tree.split("\n") for i in range(0, len(tree)): line = tree[i].split("\t") lines.append(line) lines = [line for line in lines if len(line) == 10] verbs = {} print("========= Verbs in Sentence ==========") for line in lines: if line[3][0] == "V": print(" Word : ", line[1]) print(" Tag: ", line[3]) verbs[int(line[0])] = line[3] if (len(verbs) >= 2): print("Sentence : \"", sentence, "\" is complex!") else: print("Sentence : ", sentence, " is simple!") exit() print("=======================================\n") dict_tree, dep_tree = create_dict(lines) word_dep = {} # Find all the dependencies of the verbs for i in range(0, int(lines[-1][0]) + 1): word_dep[i] = get_children(i, dict_tree) print("============== Word Dependencies =============") print(word_dep) # for verb in verbs.keys(): # if len(word_dep[verb]) > 0: # child_nodes = word_dep[verb] # else: # continue # new_children = [] # for child in child_nodes: # new_children.extend(word_dep[child]) # word_dep[verb].extend(new_children) # for verb in verbs.keys(): # word_dep[verb] = sorted(word_dep[verb]) # print(word_dep[verb]) # print("-------------------------------------") # verb_index = list(verbs.keys()) # for i in range(0,len(verb_index)): # for j in range(i+1, len(verb_index)): # if lines[j][4][0] != 'V': # print(" POS TAG: ",lines[j][4]) # word_dep[verb_index[i]] = list(set(word_dep[verb_index[i]]) - set(word_dep[verb_index[j]])) # for verb in verbs.keys(): # print(word_dep[verb]) # print("-------------------------------------") clause_start = [] triples_list = [] print("=============== TRIPLES =============") for triple in triples: w1, rel, w2 = triple print(rel, w1, w2) if rel == 'nsubj' or rel == 'nsubjpass': clause_start.append(triple) triples_list.append(triple) print("\n\n\n") clause_words = {} for clause in clause_start: w1, rel, w2 = clause print("Clause : ", clause) clause_words[clause] = [] for triple in triples_list: word1, reln, word2 = triple # print("Triple for Clause: ", reln, word1, word2) if cmp(triple, clause) == False: if word1 == w1 or word1 == w2 and (reln != 'nsubj' and reln != 'nsubjpass'): clause_words[clause].append(triple) clause_words[clause].extend( find_all_deps(triple, triples_list)) elif word2 == w1 or word2 == w2 and (reln != 'nsubj' and reln != 'nsubjpass'): clause_words[clause].append(triple) clause_words[clause].extend( find_all_deps(triple, triples_list, [])) # for clause in clause_words: # clause_list = clause_words[clause] # for i in range(0,len(clause_list)): # print("Dep: ",dep) # dep_list = find_all_deps(tuple(clause_list[i]), triples_list) # for dep in dep_list: # if dep not in clause_list: # clause_list.append(dep) print("\n\n============= Clause Boundary ==============") for clause in clause_words: print("Clause: ", clause) words = [] for clause_list in clause_words[clause]: w1, rel, w2 = clause_list words.append(w1[0]) words.append(w2[0]) words = set(words) word_ind = [] for i in range(len(tokens)): if tokens[i] in words: word_ind.append(i) print(sorted(word_ind)) word_ind = sorted(word_ind) add_list = [] for w in word_ind: word_ind.extend(list(word_dep[w])) word_ind = sorted(set(word_ind)) print(word_ind) print("=======================================")
def save_tree(name, tree): cf = CanvasFrame() tc = TreeWidget(cf.canvas(), tree) cf.add_widget(tc, 10, 10) # (10,10) offsets cf.print_to_file(name + ".jpg") cf.destroy()
def generate_all_trees(): NUM_TO_KEEP = 10 s1 = [1, 2, 3, 4, 5] s2 = [3, 4, 5, 1, 2] s3 = [1, 2, 3, 4] s4 = [3, 4, 5] s5 = [1, 2] kx = [] ky = [] min = [] strings = [s1, s2, s4, s5] strings = [s for s in strings] curr_sol = [] max_len = -1 terminals = set() for c in sum(strings, []): terminals.add(c) t = len(terminals) for t1, t2, t3, t4 in product( *[generate_trees(s, max_len=max_len) for s in strings]): trees = [t1, t2, t3, t4] s = SimpleTeacher() total_nodes = sum([count_inner_nodes(t) for t in trees]) for tree in trees: s.addPositiveExample(tree) c = learn(s, {}) """ nt_set = set() for p in c.productions(): nt_set.add(p.lhs()) nt = len(nt_set) p = float(total_nodes)/float(nt-1-t) """ p, nt = measure_generalization(trees, c) kx.append(nt) ky.append(p) if len(min) < NUM_TO_KEEP: min.append(p) curr_sol.append((c, tuple(trees))) if any([p >= m for m in min]): for i, m in enumerate(min): if p >= m: curr_sol[i] = (c, tuple(trees)) min[i] = p break print(sum(min) / len(min)) print(curr_sol[0]) plt.scatter(kx, ky) plt.show() for ind, sol in enumerate(curr_sol): print('p') in_row = 4 len_one = 140 height = 200 i = 0 cf = CanvasFrame() for t in sol[1]: tc = TreeWidget(cf.canvas(), t) x, y = (i % in_row) * len_one, int(i / in_row) * height print(x, y) cf.add_widget(tc, x, y) i = i + 1 cf.print_to_file('trees' + str(ind) + '.ps') cf.destroy()