Пример #1
0
def get_constituents_with_label(parse_dict, connective):
    DocID = connective.DocID
    sent_index = connective.sent_index
    parse_tree = parse_dict[DocID]["sentences"][sent_index]["parsetree"].strip(
    )
    syntax_tree = Syntax_tree(parse_tree)
    if syntax_tree.tree == None:
        return []

    conn_indices = connective.token_indices
    constituent_nodes = []
    if len(conn_indices) == 1:  # like and or so...
        conn_node = syntax_tree.get_leaf_node_by_token_index(
            conn_indices[0]).up
    else:
        conn_node = syntax_tree.get_common_ancestor_by_token_indices(
            conn_indices)

        conn_leaves = set([
            syntax_tree.get_leaf_node_by_token_index(conn_index)
            for conn_index in conn_indices
        ])

        children = conn_node.get_children()
        for child in children:
            leaves = set(child.get_leaves())
            if conn_leaves & leaves == set([]):
                constituent_nodes.append(child)

    curr = conn_node
    while not curr.is_root():
        constituent_nodes.extend(syntax_tree.get_siblings(curr))
        curr = curr.up

    Arg1_token_indices = connective.Arg1_token_indices
    Arg2_token_indices = connective.Arg2_token_indices
    Arg1_leaves = set([
        syntax_tree.get_leaf_node_by_token_index(index)
        for index in Arg1_token_indices
    ])
    Arg2_leaves = set([
        syntax_tree.get_leaf_node_by_token_index(index)
        for index in Arg2_token_indices
    ])

    # 根据node生成Constituent对象,并标记
    constituents = []
    for node in constituent_nodes:
        cons = Constituent(syntax_tree, node)
        cons.connective = connective
        leaves = set(node.get_leaves())
        if leaves <= Arg1_leaves:
            cons.label = "Arg1"
        elif leaves <= Arg2_leaves:
            cons.label = "Arg2"
        else:
            cons.label = "NULL"
        constituents.append(cons)

    return constituents
Пример #2
0
def _get_constituents(parse_dict, connective):
    DocID = connective.DocID
    sent_index = connective.sent_index
    parse_tree = parse_dict[DocID]["sentences"][sent_index]["parsetree"].strip()
    syntax_tree = Syntax_tree(parse_tree)
    if syntax_tree.tree == None:
        return []

    conn_indices = connective.token_indices
    constituent_nodes = []
    if len(conn_indices) == 1:# like and or so...
        conn_node = syntax_tree.get_leaf_node_by_token_index(conn_indices[0]).up
    else:
        conn_node = syntax_tree.get_common_ancestor_by_token_indices(conn_indices)
        conn_leaves = set([syntax_tree.get_leaf_node_by_token_index(conn_index) for conn_index in conn_indices])
        children = conn_node.get_children()
        for child in children:
            leaves = set(child.get_leaves())
            if conn_leaves & leaves == set([]):
                constituent_nodes.append(child)

    curr = conn_node
    while not curr.is_root():
        constituent_nodes.extend(syntax_tree.get_siblings(curr))
        curr = curr.up

    # obtain the Constituent object according to the node.
    constituents = []
    for node in constituent_nodes:
        cons = Constituent(syntax_tree, node)
        cons.connective = connective
        constituents.append(cons)
    return constituents
Пример #3
0
def get_constituents_with_label2(parse_dict, connective):
    DocID = connective.DocID
    sent_index = connective.sent_index
    parse_tree = parse_dict[DocID]["sentences"][sent_index]["parsetree"].strip()
    syntax_tree = Syntax_tree(parse_tree)
    if syntax_tree.tree == None:
        return []

    conn_indices = connective.token_indices
    constituent_nodes = []
    if len(conn_indices) == 1:# like and or so...
        conn_node = syntax_tree.get_leaf_node_by_token_index(conn_indices[0]).up
    else:
        conn_node = syntax_tree.get_common_ancestor_by_token_indices(conn_indices)

        conn_leaves = set([syntax_tree.get_leaf_node_by_token_index(conn_index) for conn_index in conn_indices])

        children = conn_node.get_children()
        for child in children:
            leaves = set(child.get_leaves())
            if conn_leaves & leaves == set([]):
                constituent_nodes.append(child)

    curr = conn_node
    while not curr.is_root():
        constituent_nodes.extend(syntax_tree.get_siblings(curr))
        curr = curr.up


    Arg1_token_indices = connective.Arg1_token_indices
    Arg2_token_indices = connective.Arg2_token_indices
    Arg1_leaves = set([syntax_tree.get_leaf_node_by_token_index(index) for index in Arg1_token_indices])
    Arg2_leaves = set([syntax_tree.get_leaf_node_by_token_index(index) for index in Arg2_token_indices])

    constituents = []
    for node in constituent_nodes:
        cons = Constituent(syntax_tree, node)
        cons.connective = connective
        leaves = set(node.get_leaves())
        if leaves <= Arg1_leaves:
            cons.label = "Arg1"
        elif leaves <= Arg2_leaves:
            cons.label = "Arg2"
        else:
            cons.label = "NULL"
        constituents.append(cons)

    return constituents
Пример #4
0
def ssArgumentExt(inputFilenamePath):
 parse_file = codecs.open(inputFilenamePath+'/parses.json', encoding='utf8');
 en_parse_dict = json.load(parse_file);
 i = 0;
 for prediction in observedArray:
  filename = bigDiction[i][2];
  sentenceNumber = int(bigDiction[i+1][3]) + 1;
  connWordID = int(bigDiction[i][4]);
  print "ConnWordID: " + str(connWordID);
  parse_tree = en_parse_dict[filename]["sentences"][sentenceNumber]["parsetree"].strip();
  syntax_tree = Syntax_tree(parse_tree)
  if syntax_tree.tree == None:
   return []
  #Get Connective Indices
  conn_indices = [connWordID];
  constituent_nodes = [];
  if len(conn_indices) == 1:# like and or so...
        conn_node = syntax_tree.get_leaf_node_by_token_index(conn_indices[0]).up
  else:
        conn_node = syntax_tree.get_common_ancestor_by_token_indices(conn_indices)
        conn_leaves = set([syntax_tree.get_leaf_node_by_token_index(conn_index) for conn_index in conn_indices])
        children = conn_node.get_children()
        for child in children:
            leaves = set(child.get_leaves())
            if conn_leaves & leaves == set([]):
                constituent_nodes.append(child)
  
  curr = conn_node
  while not curr.is_root():
   constituent_nodes.extend(syntax_tree.get_siblings(curr))
   curr = curr.up

  # obtain the Constituent object according to the node.
  constituents = []
  for node in constituent_nodes:
   cons = Constituent(syntax_tree, node)
   #print "Object Type: " + str(cons.type());
   #print "Object Dir: " + str(cons.dir());
   #print "Object id: " + str(cons.id());
   #print "cons: " + str(cons.connective);
   connective = Connective(filename, sentenceNumber, conn_indices, "text");
   cons.connective = connective
   constituents.append(cons)
  i = i + 1;
  print "Connective ID:" + str(connWordID); 
  print "Size of Observed Array: " + str(len(observedArray));
  print "Size of Constituents Array: " + str(len(constituents));