def prep_tree_srm_arg(relation_list, arg_pos, wbm, max_length, all_left_branching=False): assert arg_pos == 1 or arg_pos == 2 n_samples = len(relation_list) w_indices = np.zeros((2 * max_length, n_samples)).astype("int64") c_mask = np.zeros((max_length, n_samples), dtype=config.floatX) node_mask = np.zeros((2 * max_length, n_samples), dtype=config.floatX) # children = np.zeros((max_length, n_samples, 2), dtype='int64') children = np.zeros((n_samples, max_length, 3), dtype="int64") for i, relation in enumerate(relation_list): if all_left_branching: parse_tree = tree_util.left_branching_tree(relation, arg_pos) else: parse_tree = tree_util.find_parse_tree(relation, arg_pos) if len(parse_tree.leaves()) == 0: parse_tree = tree_util.left_branching_tree(relation, arg_pos) indices = wbm.index_tokens(parse_tree.leaves(), ignore_OOV=False) sequence_length = min(max_length, len(indices)) w_indices[:sequence_length, i] = indices[:sequence_length] ordering_matrix, num_leaves = tree_util.reverse_toposort(parse_tree) num_nodes = min(2 * max_length, ordering_matrix.shape[0]) print num_leaves, num_nodes # assert(num_nodes >= num_leaves) if num_nodes > num_leaves: num_inner_nodes = num_nodes - num_leaves children[i, :num_inner_nodes, :] = ordering_matrix[num_leaves:num_nodes, :] c_mask[:num_inner_nodes, i] = 1.0 node_mask[num_leaves:num_nodes, i] = 1.0 children = np.swapaxes(children, 0, 1) embedding_series = ( wbm.wm[w_indices.flatten()].reshape([max_length * 2, n_samples, wbm.num_units]).astype(config.floatX) ) return embedding_series, children, c_mask, node_mask
def prep_tree_srm_arg(relation_list, arg_pos, wbm, max_length, all_left_branching=False, node_label_alphabet={}): """Make the matrices from the data required for the tree model T = number of time steps N = number of samples d = dimensionality of the embedding k = dimensionality of the embedding_series: 2T x N x d serrated matrix word embedding for the leaves children : T x N x 3 children serrated matrix c_mask : T x N masking matrix for children matrix node_mask : 2T x N masking matrix for the internal nodes (for embedding_series) nice for computing mean h or sum h node_label_tensor : 2T x N x k. This masks embedding_series matrix """ assert arg_pos == 1 or arg_pos == 2 n_samples = len(relation_list) w_indices = np.zeros((2 * max_length, n_samples)).astype('int64') c_mask = np.zeros((max_length, n_samples), dtype=config.floatX) node_mask = np.zeros((2 * max_length, n_samples), dtype=config.floatX) children = np.zeros((n_samples, max_length, 3), dtype='int64') node_label_tensor = np.zeros((2 * max_length, n_samples, len(node_label_alphabet)), dtype=config.floatX) for i, relation in enumerate(relation_list): if all_left_branching: parse_tree = tree_util.left_branching_tree(relation, arg_pos) else: parse_tree = tree_util.find_parse_tree(relation, arg_pos) if len(parse_tree.leaves()) == 0: parse_tree = tree_util.left_branching_tree(relation, arg_pos) indices = wbm.index_tokens(parse_tree.leaves(), ignore_OOV=False) sequence_length = min(max_length, len(indices)) w_indices[:sequence_length, i] = indices[:sequence_length] ordering_matrix, node_label_list, num_leaves = \ tree_util.reverse_toposort(parse_tree) num_nodes = min(2 * max_length, ordering_matrix.shape[0]) if num_nodes > num_leaves: num_inner_nodes = num_nodes - num_leaves children[i, :num_inner_nodes, :] = ordering_matrix[num_leaves:num_nodes, :] c_mask[:num_inner_nodes, i] = 1. node_mask[num_leaves:num_nodes, i] = 1. if len(node_label_alphabet) > 0: for t, node_label in enumerate(node_label_list): if node_label is not None and t < (2 * max_length): if node_label in node_label_alphabet: label_index = node_label_alphabet[node_label] else: label_index = node_label_alphabet['OTHERS'] node_label_tensor[t, i, label_index] = 1. children = np.swapaxes(children, 0, 1) embedding_series = \ wbm.wm[w_indices.flatten()].\ reshape([max_length * 2, n_samples, wbm.num_units]).\ astype(config.floatX) return embedding_series, children, c_mask, node_mask, node_label_tensor
def prep_tree_arg(relation_list, arg_pos, all_left_branching=False): parse_trees = [] for i, relation in enumerate(relation_list): if all_left_branching: parse_tree = tree_util.left_branching_tree(relation, arg_pos) else: parse_tree = tree_util.find_parse_tree(relation, arg_pos) print parse_tree if len(parse_tree.leaves()) == 0: print "use left branching tree because parse is empty" parse_tree = tree_util.left_branching_tree(relation, arg_pos) else: parse_tree = tree_util.binarize_tree(parse_tree) print parse_tree parse_trees.append(parse_tree) return parse_trees
def prep_tree_arg(relation_list, arg_pos, all_left_branching=False): parse_trees = [] for i, relation in enumerate(relation_list): if all_left_branching: parse_tree = tree_util.left_branching_tree(relation, arg_pos) else: parse_tree = tree_util.find_parse_tree(relation, arg_pos) print parse_tree if len(parse_tree.leaves()) == 0: print 'use left branching tree because parse is empty' parse_tree = tree_util.left_branching_tree(relation, arg_pos) else: parse_tree = tree_util.binarize_tree(parse_tree) print parse_tree parse_trees.append(parse_tree) return parse_trees
def prep_tree_srm_arg(relation_list, arg_pos, wbm, max_length, all_left_branching=False): assert arg_pos == 1 or arg_pos == 2 n_samples = len(relation_list) w_indices = np.zeros((2 * max_length, n_samples)).astype('int64') c_mask = np.zeros((max_length, n_samples), dtype=config.floatX) node_mask = np.zeros((2 * max_length, n_samples), dtype=config.floatX) #children = np.zeros((max_length, n_samples, 2), dtype='int64') children = np.zeros((n_samples, max_length, 3), dtype='int64') for i, relation in enumerate(relation_list): if all_left_branching: parse_tree = tree_util.left_branching_tree(relation, arg_pos) else: parse_tree = tree_util.find_parse_tree(relation, arg_pos) if len(parse_tree.leaves()) == 0: parse_tree = tree_util.left_branching_tree(relation, arg_pos) indices = wbm.index_tokens(parse_tree.leaves(), ignore_OOV=False) sequence_length = min(max_length, len(indices)) w_indices[:sequence_length, i] = indices[:sequence_length] ordering_matrix, num_leaves = tree_util.reverse_toposort(parse_tree) num_nodes = min(2 * max_length, ordering_matrix.shape[0]) print num_leaves, num_nodes #assert(num_nodes >= num_leaves) if num_nodes > num_leaves: num_inner_nodes = num_nodes - num_leaves children[i, :num_inner_nodes, :] = ordering_matrix[ num_leaves:num_nodes, :] c_mask[:num_inner_nodes, i] = 1. node_mask[num_leaves:num_nodes, i] = 1. children = np.swapaxes(children, 0, 1) embedding_series = \ wbm.wm[w_indices.flatten()].\ reshape([max_length * 2, n_samples, wbm.num_units]).\ astype(config.floatX) return embedding_series, children, c_mask, node_mask