Exemplo n.º 1
0
def revise_node(content, amr_nodes_content, amr_nodes_acronym):
    '''
	In case of single '()' contains multiple nodes
	e.x. (m / moment :poss p5)

	:param str context:
	:param dict amr_nodes_content: content as key
	:param dict amr_nodes_acronym: acronym as key
	'''
    m = re.search('\w+\s/\s\S+\s+(.+)', content.replace('\n', ''))
    if m and ' / name' not in content and ':polarity -' not in content:
        arg_nodes = []
        acr = re.search('\w+\s/\s\S+', content).group().split(' / ')[0]
        nodes = re.findall('\S+\s\".+\"|\S+\s\S+', m.group(1))
        for i in nodes:
            i = re.search('(:\S+)\s(.+)', i)
            role = i.group(1)
            concept = i.group(2).strip(')')
            if concept in amr_nodes_acronym:
                node = copy.copy(amr_nodes_acronym[concept])
                node.next_nodes = []
            else:  # in case of (d / date-entity :year 2012)
                node = Node(name=concept)
                amr_nodes_acronym[concept] = node
            node.edge_label = role
            arg_nodes.append(node)
        amr_nodes_acronym[acr].next_nodes = arg_nodes
        amr_nodes_content[content].next_nodes = arg_nodes
Exemplo n.º 2
0
def generate_nodes_multiple(content, amr_nodes_content, amr_nodes_acronym):
    '''
	Generate Node object for nested '()'

	:param str context:
	:param dict amr_nodes_content: content as key
	:param dict amr_nodes_acronym: acronym as key
	'''
    try:
        assert content.count('(') > 1 and content.count(')') > 1
        assert content.count('(') == content.count(')')
    except AssertionError:
        raise Exception('Unmatched parenthesis')

    #note that if we want to get complete content, we need to complete subgraphs in amr_nodes_content, however if we really complete it, the following can't be indexed in amr_nodes_content
    _content = content  #because content will gradually move its components, so use _content to back up for amr_nodes_content
    org = content  #original_content   difference between above is It don't remove :name
    arg_nodes = []
    is_named_entity = False

    # Remove existing nodes from the content, and link these nodes to the root
    # of the subtree
    for i in sorted(amr_nodes_content, key=len, reverse=True):
        if i in content:
            e = content.find(i)
            s = content[:e].rfind(':')
            role = re.search(':\S+\s', content[s:e]).group()  # Edge label

            amr_nodes_content[i].edge_label = role.strip()
            if ':name' in role:
                is_named_entity = True
                ne = amr_nodes_content[i]
            else:
                arg_nodes.append(amr_nodes_content[i])
            if ':name' not in role:
                org = org.replace(role + i, '', 1)
            content = content.replace(role + i, '', 1)

    predict_event = re.search('\w+\s/\s\S+', content).group().split(' / ')
    if predict_event:
        acr = predict_event[0]  # Acronym
        ful = predict_event[1]  # Full name
    else:
        acr, ful = '-', '-'

    # In case of :polarity -
    is_polarity = True if re.search(":polarity\s-", content) else False

    nodes = re.findall(':\S+\s\S+', content)
    for i in nodes:
        i = re.search('(:\S+)\s(\S+)', i)
        role = i.group(1)
        concept = i.group(2).strip("()")
        if role == ':wiki' and is_named_entity:
            continue
        if role in [':polarity', ':quant', ':age', ':value']:
            continue
        if concept in amr_nodes_acronym:
            node = copy.copy(amr_nodes_acronym[concept])
            content = content.replace(i.group(0), "")
        # In case of (d / date-entity :year 2012)
        else:
            node = Node(name=concept)
            amr_nodes_acronym[concept] = node
            # if re.search("\s+"+concept+"[^\d]", content, ):
            # 	content.replace(concept, )
        node.edge_label = role
        arg_nodes.append(node)

        # Named entity is a special node, so the subtree of a
        # named entity will be merged. For example,
        #     (p / person :wiki -
        #        :name (n / name
        #                 :op1 "Pascale"))
        # will be merged as one node.
        # According to AMR Specification, "we fill the :instance
        # slot from a special list of standard AMR named entity types".
        # Thus, for named entity node, we will use entity type
        # (p / person in the example above) instead of :instance

    if is_named_entity:
        # Get Wikipedia title:
        if re.match('.+:wiki\s-.*', content):
            wikititle = '-'  # Entity is NIL, Wiki title does not exist
        else:
            m = re.search(':wiki\s\"(.+?)\"', content)
            if m:
                wikititle = urllib.parse.unquote_plus(m.group(1))  # Wiki title
            else:
                wikititle = ''  # There is no Wiki title information

        new_node = Node(name=acr,
                        ful_name=ful,
                        next_nodes=arg_nodes,
                        parents=set(),
                        edge_label=ne.ful_name,
                        is_entity=True,
                        entity_type=ful,
                        entity_name=ne.entity_name,
                        wiki=wikititle,
                        polarity=is_polarity,
                        content=content,
                        original_content=org)
        amr_nodes_content[_content] = new_node
        amr_nodes_acronym[acr] = new_node

    elif len(arg_nodes) > 0:
        new_node = Node(name=acr,
                        ful_name=ful,
                        next_nodes=arg_nodes,
                        parents=set(),
                        polarity=is_polarity,
                        content=content,
                        original_content=_content)
        amr_nodes_content[_content] = new_node
        amr_nodes_acronym[acr] = new_node
    for child in new_node.next_nodes:
        child.parents.add(new_node)
Exemplo n.º 3
0
def generate_node_single(content, amr_nodes_content, amr_nodes_acronym):
    '''
	Generate Node object for single '()'

	:param str context:
	:param dict amr_nodes_content: content as key
	:param dict amr_nodes_acronym: acronym as key
	'''
    is_named_entity = False
    try:
        assert content.count('(') == 1 and content.count(')') == 1
    except AssertionError:
        raise Exception('Unmatched parenthesis')

    predict_event = re.search('(\w+)\s/\s(\S+)', content)
    if predict_event:
        acr = predict_event.group(1)  # Acronym
        ful = predict_event.group(2).strip(')')  # Full name
    else:
        acr, ful = '-', '-'

    # In case of :polarity -
    is_polarity = True if re.search(":polarity\s-", content) else False

    # :ARG ndoes
    arg_nodes = []
    nodes = re.findall(':\S+\s\S+', content)
    for i in nodes:
        i = re.search('(:\S+)\s(\S+)', i)
        role = i.group(1)
        concept = i.group(2).strip(')')
        if role == ':wiki' and is_named_entity:
            continue
        if role == ':polarity':
            continue
        if concept in amr_nodes_acronym:
            node = copy.copy(amr_nodes_acronym[concept])
            node.next_nodes = []
        # In case of (d / date-entity :year 2012)
        else:
            node = Node(name=concept, original_content=concept)
            amr_nodes_acronym[concept] = node
        node.edge_label = role
        arg_nodes.append(node)

    # Node is a named entity
    names = re.findall(':op\d\s\"\S+\"', content)
    if len(names) > 0:
        entity_name = ''
        for i in names:
            entity_name += re.match(':op\d\s\"(\S+)\"', i).group(1) + ' '
        entity_name = urllib.parse.unquote_plus(entity_name.strip())
        new_node = Node(name=acr,
                        ful_name=ful,
                        next_nodes=arg_nodes,
                        parents=set(),
                        entity_name=entity_name,
                        polarity=is_polarity,
                        content=content,
                        original_content=content)
        amr_nodes_content[content] = new_node
        amr_nodes_acronym[acr] = new_node
    else:
        new_node = Node(name=acr,
                        ful_name=ful,
                        next_nodes=arg_nodes,
                        parents=set(),
                        polarity=is_polarity,
                        content=content,
                        original_content=content)
        amr_nodes_content[content] = new_node
        amr_nodes_acronym[acr] = new_node
Exemplo n.º 4
0
def generate_nodes_multiple(content, amr_nodes_content, amr_nodes_acronym):
    '''
    Generate Node object for nested '()'

    :param str context:
    :param dict amr_nodes_content: content as key
    :param dict amr_nodes_acronym: acronym as key
    '''
    try:
        assert content.count('(') > 1 and content.count(')') > 1
        assert content.count('(') == content.count(')')
    except AssertionError:
        raise Exception('Unmatched parenthesis')

    _content = content
    arg_nodes = []
    is_named_entity = False

    # Remove existing nodes from the content, and link these nodes to the root
    # of the subtree
    for i in sorted(amr_nodes_content, key=len, reverse=True):
        if i in content:
            e = content.find(i)
            s = content[:e].rfind(':')
            role = re.search(':\S+\s', content[s:e]).group() # Edge label
            content = content.replace(role+i, '', 1)
            amr_nodes_content[i].edge_label = role.strip()
            if ':name' in role:
                is_named_entity = True
                ne = amr_nodes_content[i]
            else:
                arg_nodes.append(amr_nodes_content[i])

    predict_event = re.search('\w+\s/\s\S+', content).group().split(' / ')
    if predict_event:
        acr = predict_event[0] # Acronym
        ful = predict_event[1] # Full name
    else:
        acr, ful = '-', '-'

    # In case of :polarity -
    is_polarity = True if re.search(":polarity\s-", content) else False

    nodes = re.findall(':\S+\s\S+', content)
    for i in nodes:
        i = re.search('(:\S+)\s(\S+)', i)
        role = i.group(1)
        concept = i.group(2).strip(')')
        if role == ':wiki' and is_named_entity:
            continue
        if role == ':polarity':
            continue
        if concept in amr_nodes_acronym:
            node = copy.copy(amr_nodes_acronym[concept])
            node.next_nodes = []
        # In case of (d / date-entity :year 2012)
        else:
            node = Node(name=concept)
            amr_nodes_acronym[concept] = node
        node.edge_label = role
        arg_nodes.append(node)

        # Named entity is a special node, so the subtree of a
        # named entity will be merged. For example,
        #     (p / person :wiki -
        #        :name (n / name
        #                 :op1 "Pascale"))
        # will be merged as one node.
        # According to AMR Specification, "we fill the :instance
        # slot from a special list of standard AMR named entity types".
        # Thus, for named entity node, we will use entity type
        # (p / person in the example above) instead of :instance

    if is_named_entity:
        # Get Wikipedia title:
        if re.match('.+:wiki\s-.*', content):
            wikititle = '-' # Entity is NIL, Wiki title does not exist
        else:
            m = re.search(':wiki\s\"(.+?)\"', content)
            if m:
                wikititle = urllib.parse.unquote_plus(m.group(1)) # Wiki title
            else:
                wikititle = '' # There is no Wiki title information

        new_node = Node(name=acr, ful_name=ful, next_nodes=arg_nodes,
                        edge_label=ne.ful_name, is_entity=True,
                        entity_type=ful, entity_name=ne.entity_name,
                        wiki=wikititle, polarity=is_polarity, content=content)
        amr_nodes_content[_content] = new_node
        amr_nodes_acronym[acr] = new_node

    elif len(arg_nodes) > 0:
        new_node = Node(name=acr, ful_name=ful, next_nodes=arg_nodes,
                        polarity=is_polarity, content=content)
        amr_nodes_content[_content] = new_node
        amr_nodes_acronym[acr] = new_node