def revise_node(content, amr_nodes_content, amr_nodes_acronym): ''' In case of single '()' contains multiple nodes e.x. (m / moment :poss p5) :param str context: :param dict amr_nodes_content: content as key :param dict amr_nodes_acronym: acronym as key ''' m = re.search('\w+\s/\s\S+\s+(.+)', content.replace('\n', '')) if m and ' / name' not in content and ':polarity -' not in content: arg_nodes = [] acr = re.search('\w+\s/\s\S+', content).group().split(' / ')[0] nodes = re.findall('\S+\s\".+\"|\S+\s\S+', m.group(1)) for i in nodes: i = re.search('(:\S+)\s(.+)', i) role = i.group(1) concept = i.group(2).strip(')') if concept in amr_nodes_acronym: node = copy.copy(amr_nodes_acronym[concept]) node.next_nodes = [] else: # in case of (d / date-entity :year 2012) node = Node(name=concept) amr_nodes_acronym[concept] = node node.edge_label = role arg_nodes.append(node) amr_nodes_acronym[acr].next_nodes = arg_nodes amr_nodes_content[content].next_nodes = arg_nodes
def generate_nodes_multiple(content, amr_nodes_content, amr_nodes_acronym): ''' Generate Node object for nested '()' :param str context: :param dict amr_nodes_content: content as key :param dict amr_nodes_acronym: acronym as key ''' try: assert content.count('(') > 1 and content.count(')') > 1 assert content.count('(') == content.count(')') except AssertionError: raise Exception('Unmatched parenthesis') #note that if we want to get complete content, we need to complete subgraphs in amr_nodes_content, however if we really complete it, the following can't be indexed in amr_nodes_content _content = content #because content will gradually move its components, so use _content to back up for amr_nodes_content org = content #original_content difference between above is It don't remove :name arg_nodes = [] is_named_entity = False # Remove existing nodes from the content, and link these nodes to the root # of the subtree for i in sorted(amr_nodes_content, key=len, reverse=True): if i in content: e = content.find(i) s = content[:e].rfind(':') role = re.search(':\S+\s', content[s:e]).group() # Edge label amr_nodes_content[i].edge_label = role.strip() if ':name' in role: is_named_entity = True ne = amr_nodes_content[i] else: arg_nodes.append(amr_nodes_content[i]) if ':name' not in role: org = org.replace(role + i, '', 1) content = content.replace(role + i, '', 1) predict_event = re.search('\w+\s/\s\S+', content).group().split(' / ') if predict_event: acr = predict_event[0] # Acronym ful = predict_event[1] # Full name else: acr, ful = '-', '-' # In case of :polarity - is_polarity = True if re.search(":polarity\s-", content) else False nodes = re.findall(':\S+\s\S+', content) for i in nodes: i = re.search('(:\S+)\s(\S+)', i) role = i.group(1) concept = i.group(2).strip("()") if role == ':wiki' and is_named_entity: continue if role in [':polarity', ':quant', ':age', ':value']: continue if concept in amr_nodes_acronym: node = copy.copy(amr_nodes_acronym[concept]) content = content.replace(i.group(0), "") # In case of (d / date-entity :year 2012) else: node = Node(name=concept) amr_nodes_acronym[concept] = node # if re.search("\s+"+concept+"[^\d]", content, ): # content.replace(concept, ) node.edge_label = role arg_nodes.append(node) # Named entity is a special node, so the subtree of a # named entity will be merged. For example, # (p / person :wiki - # :name (n / name # :op1 "Pascale")) # will be merged as one node. # According to AMR Specification, "we fill the :instance # slot from a special list of standard AMR named entity types". # Thus, for named entity node, we will use entity type # (p / person in the example above) instead of :instance if is_named_entity: # Get Wikipedia title: if re.match('.+:wiki\s-.*', content): wikititle = '-' # Entity is NIL, Wiki title does not exist else: m = re.search(':wiki\s\"(.+?)\"', content) if m: wikititle = urllib.parse.unquote_plus(m.group(1)) # Wiki title else: wikititle = '' # There is no Wiki title information new_node = Node(name=acr, ful_name=ful, next_nodes=arg_nodes, parents=set(), edge_label=ne.ful_name, is_entity=True, entity_type=ful, entity_name=ne.entity_name, wiki=wikititle, polarity=is_polarity, content=content, original_content=org) amr_nodes_content[_content] = new_node amr_nodes_acronym[acr] = new_node elif len(arg_nodes) > 0: new_node = Node(name=acr, ful_name=ful, next_nodes=arg_nodes, parents=set(), polarity=is_polarity, content=content, original_content=_content) amr_nodes_content[_content] = new_node amr_nodes_acronym[acr] = new_node for child in new_node.next_nodes: child.parents.add(new_node)
def generate_node_single(content, amr_nodes_content, amr_nodes_acronym): ''' Generate Node object for single '()' :param str context: :param dict amr_nodes_content: content as key :param dict amr_nodes_acronym: acronym as key ''' is_named_entity = False try: assert content.count('(') == 1 and content.count(')') == 1 except AssertionError: raise Exception('Unmatched parenthesis') predict_event = re.search('(\w+)\s/\s(\S+)', content) if predict_event: acr = predict_event.group(1) # Acronym ful = predict_event.group(2).strip(')') # Full name else: acr, ful = '-', '-' # In case of :polarity - is_polarity = True if re.search(":polarity\s-", content) else False # :ARG ndoes arg_nodes = [] nodes = re.findall(':\S+\s\S+', content) for i in nodes: i = re.search('(:\S+)\s(\S+)', i) role = i.group(1) concept = i.group(2).strip(')') if role == ':wiki' and is_named_entity: continue if role == ':polarity': continue if concept in amr_nodes_acronym: node = copy.copy(amr_nodes_acronym[concept]) node.next_nodes = [] # In case of (d / date-entity :year 2012) else: node = Node(name=concept, original_content=concept) amr_nodes_acronym[concept] = node node.edge_label = role arg_nodes.append(node) # Node is a named entity names = re.findall(':op\d\s\"\S+\"', content) if len(names) > 0: entity_name = '' for i in names: entity_name += re.match(':op\d\s\"(\S+)\"', i).group(1) + ' ' entity_name = urllib.parse.unquote_plus(entity_name.strip()) new_node = Node(name=acr, ful_name=ful, next_nodes=arg_nodes, parents=set(), entity_name=entity_name, polarity=is_polarity, content=content, original_content=content) amr_nodes_content[content] = new_node amr_nodes_acronym[acr] = new_node else: new_node = Node(name=acr, ful_name=ful, next_nodes=arg_nodes, parents=set(), polarity=is_polarity, content=content, original_content=content) amr_nodes_content[content] = new_node amr_nodes_acronym[acr] = new_node
def generate_nodes_multiple(content, amr_nodes_content, amr_nodes_acronym): ''' Generate Node object for nested '()' :param str context: :param dict amr_nodes_content: content as key :param dict amr_nodes_acronym: acronym as key ''' try: assert content.count('(') > 1 and content.count(')') > 1 assert content.count('(') == content.count(')') except AssertionError: raise Exception('Unmatched parenthesis') _content = content arg_nodes = [] is_named_entity = False # Remove existing nodes from the content, and link these nodes to the root # of the subtree for i in sorted(amr_nodes_content, key=len, reverse=True): if i in content: e = content.find(i) s = content[:e].rfind(':') role = re.search(':\S+\s', content[s:e]).group() # Edge label content = content.replace(role+i, '', 1) amr_nodes_content[i].edge_label = role.strip() if ':name' in role: is_named_entity = True ne = amr_nodes_content[i] else: arg_nodes.append(amr_nodes_content[i]) predict_event = re.search('\w+\s/\s\S+', content).group().split(' / ') if predict_event: acr = predict_event[0] # Acronym ful = predict_event[1] # Full name else: acr, ful = '-', '-' # In case of :polarity - is_polarity = True if re.search(":polarity\s-", content) else False nodes = re.findall(':\S+\s\S+', content) for i in nodes: i = re.search('(:\S+)\s(\S+)', i) role = i.group(1) concept = i.group(2).strip(')') if role == ':wiki' and is_named_entity: continue if role == ':polarity': continue if concept in amr_nodes_acronym: node = copy.copy(amr_nodes_acronym[concept]) node.next_nodes = [] # In case of (d / date-entity :year 2012) else: node = Node(name=concept) amr_nodes_acronym[concept] = node node.edge_label = role arg_nodes.append(node) # Named entity is a special node, so the subtree of a # named entity will be merged. For example, # (p / person :wiki - # :name (n / name # :op1 "Pascale")) # will be merged as one node. # According to AMR Specification, "we fill the :instance # slot from a special list of standard AMR named entity types". # Thus, for named entity node, we will use entity type # (p / person in the example above) instead of :instance if is_named_entity: # Get Wikipedia title: if re.match('.+:wiki\s-.*', content): wikititle = '-' # Entity is NIL, Wiki title does not exist else: m = re.search(':wiki\s\"(.+?)\"', content) if m: wikititle = urllib.parse.unquote_plus(m.group(1)) # Wiki title else: wikititle = '' # There is no Wiki title information new_node = Node(name=acr, ful_name=ful, next_nodes=arg_nodes, edge_label=ne.ful_name, is_entity=True, entity_type=ful, entity_name=ne.entity_name, wiki=wikititle, polarity=is_polarity, content=content) amr_nodes_content[_content] = new_node amr_nodes_acronym[acr] = new_node elif len(arg_nodes) > 0: new_node = Node(name=acr, ful_name=ful, next_nodes=arg_nodes, polarity=is_polarity, content=content) amr_nodes_content[_content] = new_node amr_nodes_acronym[acr] = new_node