def __init__(self, text_list=[], amr_with_attributes=False, text='', alignments=[], var_to_sent={}, sent_index=0): # If the 'amr' that we get doesn't has attributes, it is just as text, # i.e. each element of the list is just a line of the text # else the amr that has all the attributes, and it is in the required form self.text_list = text_list self.amr = self.text_list # mapping from 'variables' to indices in self.amr self.var_to_index = {} if amr_with_attributes == False: # add attributes self.add_attributes() # add other attributes like 'variable_start_index' self.add_variable_info() # contains the edge lable for every class self.edges = {} self.connections = self.get_edge_info() self.get_var_to_index_mapping() # Contains all the 'variables' in the list self.nodes = self.get_node_info() self.common_text = self.get_common_text_var_mapping() # get 'var_to_sent' if var_to_sent == {}: for key in self.var_to_index.keys(): var_to_sent[key] = [sent_index] self.var_to_sent = var_to_sent self.alignments = None self.get_alignments(alignments) # Not updated while mering any 2 nodes self.get_sentence_boundaries_amr() self.get_text_index_to_var() self.directed_graph = Graph(connections=self.connections, nodes=self.nodes, edge_lables=self.edges, var_to_sent=self.var_to_sent, common_text=self.common_text, text_index_to_var=self.text_index_to_var, root=self.amr[0]['variable']) self.topological_order = self.directed_graph.topological_order # self.text is a list of sentences in case of a document AMR self.text = text self.split_text = (' '.join(self.text)).split() # get detph_list self.depth_dict = {} self.get_depth_dict()
def get_sentence_reference_graph(self, ): # a graph containing sentence to sentence links self.get_sentence_boundaries_amr() sentence_connections = [] weights = {} for index_node, node in enumerate(self.amr): current_sent_index = self.node_index_to_sent_index(index_node) if current_sent_index == -1: continue current_var = node['variable'] for location in self.var_to_index[current_var]: location_sent_index = self.node_index_to_sent_index(location) if location_sent_index != current_sent_index: if (current_sent_index, location_sent_index) not in sentence_connections: sentence_connections.append( (current_sent_index, location_sent_index)) sentence_connections.append( (location_sent_index, current_sent_index)) weights[str(current_sent_index) + ' ' + str(location_sent_index)] = 1 weights[str(location_sent_index) + ' ' + str(current_sent_index)] = 1 else: weights[str(current_sent_index) + ' ' + str(location_sent_index)] += 1 weights[str(location_sent_index) + ' ' + str(current_sent_index)] += 1 self.sentence_reference_graph = Graph( connections=sentence_connections, nodes=range(0, len(self.sentence_boundries)), weights=weights)
class GraphTest(unittest.TestCase): def setUp(self): self.test_graph = Graph() def test_init(self): self.assertEqual(self.test_graph.graph, {}) def test_add_edge_when_nodes_exists(self): self.test_graph.graph['a'] = ['b', 'c'] self.test_graph.add_edge('a', 'b') self.assertEqual(self.test_graph.graph['a'], ['b', 'c']) self.assertNotEqual(self.test_graph.graph['a'], ['b', 'c', 'b']) def test_add_edge_when_nodes_does_not_exitsts(self): self.assertEqual(self.test_graph.graph, {}) self.test_graph.add_edge('m', 'n') self.assertEqual(self.test_graph.graph, {'m': ['n']}) def test_get_neighbours_for_node(self): self.test_graph.graph['a'] = ['b', 'c'] self.test_graph.graph['b'] = ['a', 'c'] self.assertEqual(self.test_graph.get_neighbours_for_node('a'), ['b', 'c']) self.assertEqual(self.test_graph.get_neighbours_for_node('m'), []) def test_find_path_loop(self): self.test_graph.graph['m'] = ['n', 'b', 'c'] self.test_graph.graph['n'] = ['o', 'm'] self.test_graph.graph['o'] = ['p'] self.test_graph.graph['p'] = ['m', 'z'] self.assertTrue(self.test_graph.check_if_path_exists('m', 'p')) def test_find_path(self): self.test_graph.graph['a'] = ['b'] self.test_graph.graph['b'] = ['c', 'd', 'e'] self.test_graph.graph['d'] = ['f'] self.assertFalse(self.test_graph.check_if_path_exists('c', 'c')) self.assertTrue(self.test_graph.check_if_path_exists('b', 'f')) self.assertFalse(self.test_graph.check_if_path_exists('d', 'b'))
def main(): g = Graph() g.add_edge(0, 1) g.add_edge(0, 5) # g.add_edge(1, 2) # g.add_edge(2, 3) g.add_edge(3, 4) g.add_edge(3, 5) g.add_edge(4, 0) g.add_edge(5, 4) # g.add_edge(5, 2) print route_between_nodes(g, 3, 5)
class AMR(object): """ Class to handle the textual representation of AMRs The attributes associated with each AMR node are - 'parent_index','children_list','depth','no_of_children', 'child_number','text','variable','variable_start_index','variable_end_index','common_text' Default Usage - Just pass in the AMR as 'text_list' """ def __init__(self, text_list=[],amr_with_attributes=False,text='',alignments=[],var_to_sent={}, sent_index=0): self.text_list = text_list self.amr = self.text_list # mapping from 'variables' to indices in self.amr self.var_to_index = {} if amr_with_attributes == False: # add attributes self.add_attributes() # add other attributes like 'variable_start_index' self.add_variable_info() # contains the edge lable for every class self.edges = {} self.connections = self.get_edge_info() self.get_var_to_index_mapping() # Contains all the 'variables' in the AMR self.nodes = self.get_node_info() self.common_text = self.get_common_text_var_mapping() # get 'var_to_sent' if var_to_sent == {}: for key in self.var_to_index.keys(): var_to_sent[key] = [sent_index] self.var_to_sent = var_to_sent self.alignments = None self.get_alignments(alignments) # Not updated while mering any 2 nodes self.get_sentence_boundaries_amr() self.get_text_index_to_var() self.directed_graph = Graph(connections=self.connections,nodes=self.nodes, edge_lables=self.edges,var_to_sent=self.var_to_sent, common_text=self.common_text, text_index_to_var=self.text_index_to_var, root=self.amr[0]['variable']) self.topological_order = self.directed_graph.topological_order # self.text is a list of sentences in case of a document AMR self.text = text self.split_text = (' '.join(self.text)).split() # get detph_list self.depth_dict = {} self.get_depth_dict() # Merging - Core Functions def merge_named_entities_graph(self,): # Desined specifically to run initially, may not work if run after some other mergers existing_names = [] node_merged = False for var in self.directed_graph._graph.keys(): parent_var = '' for node in self.directed_graph.reverse_graph[var]: if self.directed_graph.depth_dict[node]+1 == self.directed_graph.depth_dict[var]: self.directed_graph.edge_lables[node+' '+var] parent_var = node break if parent_var!= '' and ':name' in self.directed_graph.edge_lables[parent_var+' '+var]: node_merged = False for existing_var in existing_names: can_merge = False for node in self.directed_graph.reverse_graph[existing_var]: if self.directed_graph.depth_dict[node]+1 == self.directed_graph.depth_dict[existing_var]: parent_existing_var = node op_list_second_node = self.directed_graph.get_op_list(var=parent_existing_var) op_list_first_node = self.directed_graph.get_op_list(var=parent_var) if not self.check_mutual_sublist(first_list=op_list_first_node,second_list=op_list_second_node): # don't merge if one isn't a sublist of other except when one is in the form of initials if self.check_initials(first_list=op_list_first_node,second_list=op_list_second_node): can_merge = True else: can_merge = False else: can_merge = True if self.directed_graph.common_text[existing_var].strip() == \ self.directed_graph.common_text[var].strip(): can_merge = True if can_merge: if self.directed_graph.common_text[parent_existing_var] == \ self.directed_graph.common_text[parent_var]: # If successfull merger, restart merging successfull_merge = self.merge_nodes(first_var=existing_var,second_var=var,debug=False) if successfull_merge == 2: # self.reconstruct_amr() # print successfull_merge return 1 if not node_merged: existing_names.append(var) return 0 def merge_date_entites(self,): existing_dates = [] for index_node,node in enumerate(self.amr): node_merged = False if 'date-entity ' in node['text']: for index_existing_node in existing_dates: if self.amr[index_existing_node]['common_text'].strip() == node['common_text'].strip(): self.merge_nodes(first_node_index=index_existing_node,second_node_index=index_node) self.reconstruct_amr() return 1 if not node_merged: existing_dates.append(index_node) return 0 def merge_nodes(self,first_alignment=[],second_alignment=[], first_node_index=None,second_node_index=None,debug=False, first_var='',second_var=''): # steps in the procedure - # 1. sanity checks # 2. Merging subtrees # 3. Reconstruct AMR # move subtree of the second node to first node # Return values - # 0 - Didn't merge # 1 - No merger needed # 2 - Successfull merge if first_var == '': first_var = self.amr[first_node_index]['variable'] if second_var == '': second_var = self.amr[second_node_index]['variable'] returned_value = self.directed_graph.merge_nodes_in_graph(first_var=first_var,second_var=second_var) if debug: print returned_value if returned_value != -1: return returned_value return 2 def reconstruct_amr(self): text_list=self.directed_graph.generate_text_amr() text_list =[line + '\n' for line in text_list] text_index_to_var = self.directed_graph.text_index_to_var var_to_sent = self.directed_graph.var_to_sent # Reconstruct the AMR after merging two nodes del self.text_list del self.amr del self.var_to_index del self.nodes del self.edges del self.directed_graph del self.topological_order del self.depth_dict # self.text is a list of sentences in case of a document AMR self.text_list = text_list self.amr = self.text_list # mapping from 'variables' to indices in self.amr self.var_to_index = {} # add attributes self.add_attributes() # add other attributes like 'variable_start_index' self.add_variable_info() # contains the edge lable for every class self.edges = {} self.connections = self.get_edge_info() self.get_var_to_index_mapping() # Contains all the 'variables' in the list self.nodes = self.get_node_info() del self.var_to_sent self.var_to_sent = {} for var in var_to_sent.keys(): if var in self.nodes: self.var_to_sent[var] = var_to_sent[var] self.common_text = self.get_common_text_var_mapping() temp = set(self.alignments.keys()) del self.alignments self.alignments = {} for text_index in text_index_to_var.keys(): # alignment in case of KeyError is mostly useless (but not always) self.alignments[text_index] = [] for var in text_index_to_var[text_index]: try: node_index = self.var_to_index[var][0] except KeyError: break var_path = self.node_index_to_alignment(node_index) self.alignments[text_index].append(var_path) alignments = [] for key in self.alignments.keys(): for alignment in self.alignments[key]: alignments.append(key+'-'+'.'.join(alignment)) self.alignments = None self.get_alignments(alignments) self.get_text_index_to_var() var_set = [] for key in self.text_index_to_var.keys(): var_set.extend(self.text_index_to_var[key]) var_set = list(set(var_set)) for var in var_set: if var not in self.nodes: print 'some bug' 0/0 self.directed_graph = Graph(connections=self.connections,nodes=self.nodes, edge_lables=self.edges,var_to_sent=self.var_to_sent, common_text=self.common_text, text_index_to_var=self.text_index_to_var, root=self.amr[0]['variable']) self.topological_order = self.directed_graph.topological_order self.get_depth_dict() def post_merging_sanity_tests(self,): # Check if any node is children of itself # No repreated edges, etc. # No empty lines, every line should have a variable num_opening_brackets = 0 num_closing_brackets = 0 for index,line in enumerate(self.text_list): num_opening_brackets += line.count('(') num_closing_brackets += line.count(')') if num_closing_brackets > num_opening_brackets: self.print_amr(print_indices=False) print "Merging Failed terminating ..." sys.exit() if num_opening_brackets == num_closing_brackets: if index != len(self.text_list)-1: self.print_amr(print_indices=False) print "Merging Failed terminating ..." sys.exit() if num_opening_brackets != num_closing_brackets: self.print_amr(print_indices=False) print "Merging Failed terminating ..." sys.exit() return # Merging - Helper functions def get_op_list(self,index=-1): # Returns if the node has any children with edge ':name' # Example - Input - :name (var2 / name :op1 "ABS-CBN" :op2 "News"))) # Output - ['ABS-CBN', 'News'] text = '' current_var = self.amr[index]['variable'] # print 'current_var ', current_var for child_index in self.amr[index]['children_list']: child_var = self.amr[child_index]['variable'] if self.edges[current_var+' '+child_var].startswith(':name'): text = self.amr[child_index]['text'] if text == '': return [] text = text.strip(')') text = text.split('/')[1] text = text.split() op_list = [] for index_word, word in enumerate(text): if word.startswith(':op'): op_list.append(text[index_word+1].lower()) op_list = [word for word in op_list if word!=''] return op_list def get_edges_children(self,node_index): # returns the children edges children_edges = [] for child_index in self.amr[node_index]['children_list']: edge = self.edges[self.amr[node_index]['variable']+' '+self.amr[child_index]['variable']] children_edges.append(edge) return children_edges def check_initials(self,first_list=[],second_list=[],debug=False): # return True if and only if one is initials of other if not (len(first_list) == 1 or len(second_list) == 1): return False first_list = [x.strip('"') for x in first_list] second_list = [x.strip('"') for x in second_list] if debug: print first_list,second_list if len(first_list) == 1: if first_list[0] == ''.join([x[0] for x in second_list]): return True if len(second_list) == 1: if second_list[0] == ''.join([x[0] for x in first_list]): return True return False def check_mutual_sublist(self,first_list=[],second_list=[]): first_sub_list = True second_sub_list = True for word in first_list: if word not in second_list: first_sub_list = False break for word in second_list: if word not in first_list: second_sub_list = False break if first_sub_list or second_sub_list: return True else: return False def replace_variable_in_one_text_line(self,node_index,new_name=''): # Removes the existing variable and add new variable, doesn't change in the eixising AMR subtree # Update variable name in the text text = self.amr[node_index]['text'] previous_name = self.amr[node_index]['variable'] variable_start_index = self.amr[node_index]['variable_start_index'] variable_end_index = self.amr[node_index]['variable_end_index'] variable_end_index += len(new_name)-len(previous_name) text = text[ : variable_start_index] + new_name + text[variable_end_index+1 : ] # Prepare text if '(' in text: text = text[ : variable_start_index-1] + new_name else: text = text[ : variable_start_index] + new_name # Add closing brackets, assumig children will be removed num_closing_brackets_to_add = self.amr[node_index]['depth'] if (node_index+self.get_size_linear_subtree(node_index)+1)<len(self.amr): num_closing_brackets_to_add-=self.amr[node_index+self.get_size_linear_subtree(node_index)+1]['depth'] text = text.strip(')')+')'*num_closing_brackets_to_add # Get other info corresponding to the variable variable, variable_start_index, variable_end_index = self.get_var_info_in_one_text_line(text) return text # Translation functions - provides traslations between - # (word,alignment); (alignment, node_index); # (node_index, alignment); (node_index, sent_index) def word_to_alignment(self,word='',sentence='',location_of_word=0): # assuming - tokenization of words in gold-standard and coreference resolver is same if str(location_of_word) in self.alignments.keys(): if len(self.alignments[str(location_of_word)]) == 1: return self.alignments[str(location_of_word)][0] if len(self.alignments[str(location_of_word)]) >= 1: non_edge_alignments = [] for alignment in self.alignments[str(location_of_word)]: if alignment[-1] != ['r']: non_edge_alignments.append(alignment) if len(non_edge_alignments) == 0: return None min_index = 0 for temp_index, alignment in enumerate(non_edge_alignments): if len(alignment) < len(non_edge_alignments[min_index]): min_index = temp_index return non_edge_alignments[min_index] else: return None def alignment_to_node_index(self,alignment): index = 0 for index_in_alignment, branch_to_take in enumerate(alignment[1:]): branch_to_take = int(branch_to_take) - 1 if index != 0: branch_to_take = branch_to_take - (self.amr[index]['text'].count(':')-1) else: # because text at first point doesn't start with a ':' branch_to_take = branch_to_take - self.amr[index]['text'].count(':') if branch_to_take < 0: break # print branch_to_take, self.amr[index]['text'] if int(branch_to_take) >= len(self.amr[index]['children_list']): break index = self.amr[index]['children_list'][int(branch_to_take)] return index def node_index_to_alignment(self,node_index): # Given the node_index return the alignment path = [] new_parent_index = node_index while new_parent_index != 0: try: path[0] = str(int(path[0]) + self.amr[new_parent_index]['text'].count(':')-1) except: pass path.insert(0,str(self.amr[new_parent_index]['child_number']+1)) new_parent_index = self.amr[new_parent_index]['parent_index'] return ['1'] + path def node_index_to_sent_index(self,index_node): # returns the sentence index given the node_index for index_sent, sent_range in enumerate(self.sentence_boundries): if index_node in range(sent_range[0],sent_range[1]+1): return index_sent return -1 def amr_to_text_based_on_alignments(self,var_list=[]): text = '' selected_keys_list = [] for key in self.alignments.keys(): for alignment in self.alignments[key]: index = self.alignment_to_node_index(alignment) var = self.amr[index]['variable'] if var in var_list: selected_keys_list.append(int(key)) word_list = [] for key in set(selected_keys_list): word_list.append(self.split_text[key]) text = text + self.split_text[key] + ' ' return ' '.join(list(set(word_list)) ) # Convert AMR-Graph -> AMR-text def get_AMR_from_directed_graph(self,topological_order_sub_graph={},sub_graph={}): # Function to convert graph to text-AMR # get list of variables from the directed graph list_of_variables,depth_list = sub_graph.get_var_list_from_directed_graph() text_list_sub_graph = sub_graph.get_text_list(list_of_variables,depth_list) return AMR(text_list=text_list_sub_graph,text=self.text,amr_with_attributes=False)#, # Helper functions def print_amr(self,file='',print_indices=True,write_in_file=False, one_line_output=False,return_str=False,to_print=True): printed = '' if write_in_file: for index_node,node in enumerate(self.amr): if one_line_output: file.write(node['text']+' ') else: if print_indices: file.write(str(index_node) + ' ') file.write(node['depth']*' ' + node['text']+ '\n') file.write('\n') if to_print: # print only if not writing in file for index_node,node in enumerate(self.amr): if one_line_output: print ' ' + node['text'], else: if print_indices: print str(index_node) + ' ', print node['depth']*' ' + node['text'] if return_str: for index_node,node in enumerate(self.amr): if one_line_output: printed += ' ' + node['text'] else: if print_indices: printed += str(index_node) + ' ' printed += node['depth']*' ' + node['text'] + '\n' return printed def get_nodes(self,): node_list = [] for index_node,node in enumerate(self.amr): node_list.append(node['common_text']) node_list = [x for x in node_list if x != ''] node_list = [x if ':' not in x else x[: x.index(':')-1] for x in node_list] for node in node_list: if not node.startswith('/ '): 0/0 node_list = [node[1:] for node in node_list] return node_list def get_edge_tuples(self,): edge_tuple_list = [] for parent_child_pair in self.directed_graph.edge_lables: parent, child = parent_child_pair.split(' ') lable = self.directed_graph.edge_lables[parent_child_pair][0].strip() parent_index = self.var_to_index[parent][0] child_index = self.var_to_index[child][0] parent_common_text = self.amr[parent_index]['common_text'] child_common_text = self.amr[child_index]['common_text'] if ':' in parent_common_text: parent_common_text = parent_common_text[: parent_common_text.index(':')-1] if ':' in child_common_text: child_common_text = child_common_text[: child_common_text.index(':')-1] parent_common_text = parent_common_text[1:].strip() child_common_text = child_common_text[1:].strip() edge_tuple_list.append(parent_common_text+'_'+lable+'_'+child_common_text) return edge_tuple_list def get_topological_order_sub_graph(self,nodes): # returns the topological order in the sub graph return self.directed_graph.get_topological_order_sub_graph() def get_size_linear_subtree(self,node_index,return_vars=False): initial_index = node_index initial_depth = self.amr[node_index]['depth'] var_list_linear_subtree = [] while node_index < len(self.amr): if self.amr[node_index]['depth'] <= initial_depth and node_index!=initial_index: break var_list_linear_subtree.append(self.get_var_info_in_one_text_line(self.amr[node_index]['text'])[0]) node_index += 1 if return_vars: return (node_index-1)-initial_index, var_list_linear_subtree return (node_index-1)-initial_index def break_path_by_sentences(self,path): # path - a list of connected vars # return - a dict (sent -> var sets) current_sent = 0 var_sent_dict = {} possible_current_sents = [] current_var_set = [] for var in path: current_var_sents = self.var_to_sent[var] if possible_current_sents != []: # if current_var can be in one of the possible current_sents - add it if len(list(set(current_var_sents).intersection(possible_current_sents))) != 0: possible_current_sents = list(set(current_var_sents).intersection(possible_current_sents)) current_var_set.append(var) # else, add current var set and start with new possibility of sentences else: # to-copy var_sent_dict[possible_current_sents[0]] = list(current_var_set) del current_var_set possible_current_sents = current_var_sents current_var_set = [var] else: possible_current_sents = current_var_sents current_var_set = [var] var_sent_dict[possible_current_sents[0]] = list(current_var_set) del current_var_set # second iteration to find sentences for vars occuring in multiple sents possible_current_sents = var_sent_dict.keys() for var in path: current_var_sents = self.var_to_sent[var] for sent_index in set(current_var_sents).intersection(possible_current_sents): temp_var_list = list(set(var_sent_dict[sent_index] + [var])) var_sent_dict[sent_index] = list(temp_var_list) return var_sent_dict def get_concept_relation_list(self,story_index=0,debug=False): # get concept relation list try: del self.concept_relation_list except: pass self.concept_relation_list = concept_relation_list(index_to_var=self.text_index_to_var, story_index=story_index, var_list=list(self.var_to_index.keys()), aligned_vars=self.aligned_vars, graph=self.directed_graph, text=self.text) if debug: self.concept_relation_list.print_tuples() def get_sent_amr(self,sent_index=0): var_list = [] for key in self.var_to_sent: if sent_index in self.var_to_sent[key]: var_list.append(key) return list(set(var_list)) # AMR-class construction helper functions def get_common_text_var_mapping(self,): common_text = {} for var in self.nodes: index_var = self.var_to_index[var][0] common_text[var] = self.amr[index_var]['common_text'] return common_text def get_depth_dict(self,): self.depth_dict = {} for node in self.amr: var = node['variable'] try: self.depth_dict[var] = min(node['depth'],self.depth_dict[var]) except: self.depth_dict[var] = node['depth'] def get_edge_info(self,): # gives the edge lable and all the connections connections = [] for index_node, node in enumerate(self.amr): if 'children_list' not in node.keys(): # generally arise because of issues with depth self.print_amr() print node, index_node for child in node['children_list']: self.edges[node['variable']+' '+self.amr[child]['variable']] \ = self.amr[child]['text'][0:self.amr[child]['text'].index(' ')] # Examples for '-' cases are '-of', '-to' if '-' in self.edges[node['variable']+' '+self.amr[child]['variable']]: connections.append([self.amr[child]['variable'], node['variable']]) else: connections.append([node['variable'], self.amr[child]['variable']]) # remove the imaginary edges from the graphical structure return connections def get_node_info(self,): # gives the list of all the 'variables' in the AMR nodes = [] for node in self.amr: nodes.append(node['variable']) return nodes def get_alignments(self,alignments=[]): # alignment is a list of branch to take at each step in AMR new_format_alignment = {} for alignment in alignments: if alignment.split('-')[0] in new_format_alignment.keys(): new_format_alignment[alignment.split('-')[0]].append(alignment.split('-')[1].split('.')) else: new_format_alignment[alignment.split('-')[0]] = [alignment.split('-')[1].split('.')] self.alignments = new_format_alignment def get_text_index_to_var(self,): # creates the text-index to var map self.aligned_vars = [] self.text_index_to_var = {} for key in self.alignments: temp_var_set = [] for alignment in self.alignments[key]: if alignment[-1] == 'r': alignment.pop() if alignment[-1] == '': alignment.pop() index = self.alignment_to_node_index(alignment) temp_var_set.append(self.amr[index]['variable']) self.aligned_vars.extend(temp_var_set) self.text_index_to_var[key] = temp_var_set def get_var_to_index_mapping(self,): # at one of the indices mapped with the variable, # we will have the text information accociated with the variable for index, node in enumerate(self.amr): if node['variable'] not in self.var_to_index.keys(): self.var_to_index[node['variable']] = [] if len(self.amr[index]['common_text']) > 0: self.var_to_index[node['variable']].insert(0,index) else: self.var_to_index[node['variable']].append(index) def get_var_info_in_one_text_line(self,text): # return variable,variable_start_index,variable_end_index, for any piece of text in AMR format if '(' not in text: # for cases where 'text' is of the form ':ARG0 o' variable = text[text.strip().rfind(' ')+1 : ].strip(')') variable_start_index = text.strip().rfind(' ')+1 variable_end_index = variable_start_index + len(variable)-1 else: variable_start_index = text.index('(') if ' ' not in text[variable_start_index:]: self.print_amr() print text variable = text[variable_start_index + 1 : variable_start_index +\ text[variable_start_index:].index(' ')] variable_start_index = variable_start_index + 1 variable_end_index = variable_start_index + len(variable)-1 if '~' in variable: variable = variable[ : variable.index('~')] return variable, variable_start_index, variable_end_index def get_sentence_boundaries_amr(self,): self.sentence_boundries = [] previous_depth_1_index = 0 for index_node, node in enumerate(self.amr): if node['depth'] == 1: if index_node > 1: self.sentence_boundries.append([previous_depth_1_index,index_node-1]) previous_depth_1_index = index_node self.sentence_boundries.append([previous_depth_1_index,index_node]) def add_variable_info(self,): # adding variable, it's index and all other sutff for index,node in enumerate(self.amr): variable,variable_start_index,variable_end_index = self.get_var_info_in_one_text_line(node['text']) node['variable'] = variable node['variable_start_index'] = variable_start_index node['variable_end_index'] = variable_end_index node['common_text'] = node['text'][variable_end_index+1:].strip().strip(')') def add_attributes(self,): # Takes the AMR as input in the form of 'text'. 'text' is simply a list of lines from the file # Returns the AMR in the form of dictionary, with some added attributes like,'parent_index','depth' etc. # 'depth_amr' the list of nodes amr = self.text_list depth_amr = [] for line in amr: # Calculate depth, as (leading_spaces % 6) if type(line) == type('string'): depth = (len(line) - len(line.lstrip(' '))) / 6 line = line.strip() depth_amr.append({'text':line,'depth':depth}) amr = depth_amr depth_amr = [] # add no_of_children field amr[0]['parent_index'] = -1 amr[0]['children_list'] = [] for index, line in enumerate(amr): no_of_children = 0 depth = line['depth'] temp_depth = depth+1 temp_index = index while temp_depth > depth: temp_index = temp_index + 1 if temp_index >= len(amr): break temp_depth = amr[temp_index]['depth'] if temp_depth == depth + 1: no_of_children = no_of_children + 1 # append in parents children list amr[index]['children_list'].append(temp_index) # adding parent_index and empty children list amr[temp_index]['parent_index'] = index amr[temp_index]['children_list'] = [] amr[index]['no_of_children'] = no_of_children # add_child_number field def add_child_number(amr,line_no): child_number = 0 for index, line in enumerate(amr[line_no+1:]): if line['depth'] <= amr[line_no]['depth']: break if line['depth'] == amr[line_no]['depth'] + 1: amr[line_no+index+1]['child_number'] = child_number child_number = child_number + 1 add_child_number(amr,line_no+index+1) amr[0]['child_number'] = 0 add_child_number(amr,0) self.amr = amr
def reconstruct_amr(self): text_list=self.directed_graph.generate_text_amr() text_list =[line + '\n' for line in text_list] text_index_to_var = self.directed_graph.text_index_to_var var_to_sent = self.directed_graph.var_to_sent # Reconstruct the AMR after merging two nodes del self.text_list del self.amr del self.var_to_index del self.nodes del self.edges del self.directed_graph del self.topological_order del self.depth_dict # self.text is a list of sentences in case of a document AMR self.text_list = text_list self.amr = self.text_list # mapping from 'variables' to indices in self.amr self.var_to_index = {} # add attributes self.add_attributes() # add other attributes like 'variable_start_index' self.add_variable_info() # contains the edge lable for every class self.edges = {} self.connections = self.get_edge_info() self.get_var_to_index_mapping() # Contains all the 'variables' in the list self.nodes = self.get_node_info() del self.var_to_sent self.var_to_sent = {} for var in var_to_sent.keys(): if var in self.nodes: self.var_to_sent[var] = var_to_sent[var] self.common_text = self.get_common_text_var_mapping() temp = set(self.alignments.keys()) del self.alignments self.alignments = {} for text_index in text_index_to_var.keys(): # alignment in case of KeyError is mostly useless (but not always) self.alignments[text_index] = [] for var in text_index_to_var[text_index]: try: node_index = self.var_to_index[var][0] except KeyError: break var_path = self.node_index_to_alignment(node_index) self.alignments[text_index].append(var_path) alignments = [] for key in self.alignments.keys(): for alignment in self.alignments[key]: alignments.append(key+'-'+'.'.join(alignment)) self.alignments = None self.get_alignments(alignments) self.get_text_index_to_var() var_set = [] for key in self.text_index_to_var.keys(): var_set.extend(self.text_index_to_var[key]) var_set = list(set(var_set)) for var in var_set: if var not in self.nodes: print 'some bug' 0/0 self.directed_graph = Graph(connections=self.connections,nodes=self.nodes, edge_lables=self.edges,var_to_sent=self.var_to_sent, common_text=self.common_text, text_index_to_var=self.text_index_to_var, root=self.amr[0]['variable']) self.topological_order = self.directed_graph.topological_order self.get_depth_dict()
def setUp(self): self.test_graph = Graph()