def _all_features(parse_dict, constituent, i, constituents): feature_function_list = [ CON_POS, NT_prev_curr_Path, CParent_to_root_path, self_category, CParent_to_root_path_node_names, left_sibling_category, NT_to_root_path, conn_parent_categoryCtx, parent_category, conn_rightSiblingCtx, CON_Str, CON_LStr, CON_Cat, CON_iRSib, NT_Ctx, CON_NT_Path, CON_NT_Path_iLsib, ] features = [ feature_function(parse_dict, constituent, i, constituents) for feature_function in feature_function_list ] # merge features feature = util.mergeFeatures(features) return feature
def conn_syn(parse_dict, DocID, sent_index, conn_indices): # load dict conn_self_category_dict = Explicit_dict().conn_self_category_dict conn_parent_category_dict = Explicit_dict().conn_parent_category_dict conn_left_sibling_category_dict = Explicit_dict().conn_left_sibling_category_dict conn_right_sibling_category_dict = Explicit_dict().conn_right_sibling_category_dict # feature CString = dict_util.get_C_String(parse_dict, DocID, sent_index, conn_indices) CLString = CString.lower() parse_tree = parse_dict[DocID]["sentences"][sent_index]["parsetree"].strip() syntax_tree = Syntax_tree(parse_tree) self_category = dict_util.get_self_category(syntax_tree, conn_indices) parent_category = dict_util.get_parent_category(syntax_tree, conn_indices) left_sibling_category = dict_util.get_left_sibling_category(syntax_tree, conn_indices) right_sibling_category = dict_util.get_right_sibling_category(syntax_tree, conn_indices) conn_name = CLString conn_self_category = "%s|%s" % (conn_name, self_category) conn_parent_category = "%s|%s" % (conn_name, parent_category) conn_left_sibling_category = "%s|%s" % (conn_name, left_sibling_category) conn_right_sibling_category = "%s|%s" % (conn_name, right_sibling_category) features = [] features.append(get_feature_by_feat(conn_self_category_dict , conn_self_category)) features.append(get_feature_by_feat(conn_parent_category_dict , conn_parent_category)) features.append(get_feature_by_feat(conn_left_sibling_category_dict , conn_left_sibling_category)) features.append(get_feature_by_feat(conn_right_sibling_category_dict , conn_right_sibling_category)) return util.mergeFeatures(features)
def _all_features(arg_clauses, clause_index, parse_dict): # load dict dict_lowercase_verbs = Implicit_arg1_dict().dict_lowercase_verbs dict_lemma_verbs = Implicit_arg1_dict().dict_lemma_verbs dict_curr_first = Implicit_arg1_dict().dict_curr_first dict_curr_last = Implicit_arg1_dict().dict_curr_last dict_prev_last = Implicit_arg1_dict().dict_prev_last dict_next_first = Implicit_arg1_dict().dict_next_first dict_prev_last_curr_first = Implicit_arg1_dict().dict_prev_last_curr_first dict_curr_last_next_first = Implicit_arg1_dict().dict_curr_last_next_first dict_position = {"left": 1, "middle": 2, "right": 3} dict_prev_curr_CP_production_rule = Implicit_arg1_dict().dict_prev_curr_CP_production_rule dict_prev2_pos_lemma_verb = Implicit_arg1_dict().dict_prev2_pos_lemma_verb # feature lowercase_verbs_list = dict_util.get_curr_lowercased_verbs(arg_clauses, clause_index, parse_dict) lemma_verbs_list = dict_util.get_curr_lemma_verbs(arg_clauses, clause_index, parse_dict) curr_first = dict_util.get_curr_first(arg_clauses, clause_index, parse_dict) curr_last = dict_util.get_curr_last(arg_clauses, clause_index, parse_dict) prev_last = dict_util.get_prev_last(arg_clauses, clause_index, parse_dict) next_first = dict_util.get_next_first(arg_clauses, clause_index, parse_dict) prev_last_curr_first = "%s_%s" % (prev_last, curr_first) curr_last_next_first = "%s_%s" % (curr_last, next_first) # the number of words in curr clause clause_word_num = len(arg_clauses.clauses[clause_index][0]) # the position of current clause position = dict_util.get_curr_position(arg_clauses, clause_index, parse_dict) # the arg label of current clause prev_curr_CP_production_rule = dict_util.get_prev_curr_CP_production_rule(arg_clauses, clause_index, parse_dict) prev2_pos_lemma_verb = dict_util.get_2prev_pos_lemma_verb(arg_clauses, clause_index, parse_dict) features = [] features.append(get_feature_by_feat_list(dict_lowercase_verbs, lowercase_verbs_list)) features.append(get_feature_by_feat_list(dict_lemma_verbs, lemma_verbs_list)) features.append(get_feature_by_feat(dict_curr_first, curr_first)) features.append(get_feature_by_feat(dict_curr_last, curr_last)) features.append(get_feature_by_feat(dict_prev_last, prev_last)) features.append(get_feature_by_feat(dict_next_first, next_first)) features.append(get_feature_by_feat(dict_prev_last_curr_first, prev_last_curr_first)) features.append(get_feature_by_feat(dict_curr_last_next_first, curr_last_next_first)) features.append(get_feature_by_feat(dict_position, position)) features.append(Feature("", 1, {"1": clause_word_num})) features.append(get_feature_by_feat(dict_prev2_pos_lemma_verb, prev2_pos_lemma_verb)) ''' production rules ''' features.append(get_feature_by_feat_list(dict_prev_curr_CP_production_rule, prev_curr_CP_production_rule)) return util.mergeFeatures(features)
def verbs(relation, parse_dict): #load dict dict_verb_classes = Non_Explicit_dict().dict_verb_classes '''feature''' # 1. the number of pairs of verbs in Arg1 and Arg2 from same verb class Arg1_words = dict_util.get_Arg_Words_List(relation, "Arg1", parse_dict) Arg2_words = dict_util.get_Arg_Words_List(relation, "Arg2", parse_dict) count = 0 for w1, w2 in [(w1.lower(), w2.lower()) for w1 in Arg1_words for w2 in Arg2_words]: if w1 in dict_verb_classes and w2 in dict_verb_classes: c1 = dict_verb_classes[w1] c2 = dict_verb_classes[w2] if set(c1.split("#")) & set(c2.split("#")) != set([]): count += 1 feat_1 = Feature("", 1, {1: count}) #2. POS of main verb Arg1_MV_POS = dict_util.get_main_verb_pos(relation, "Arg1", parse_dict) Arg2_MV_POS = dict_util.get_main_verb_pos(relation, "Arg2", parse_dict) MV_POS_feature_list = Arg1_MV_POS + Arg2_MV_POS MV_POS_feature = get_feature_by_list(MV_POS_feature_list) return util.mergeFeatures([feat_1, MV_POS_feature])
def all_features(arg_clauses, clause_index, parse_dict): feature_function_list = [ # lowercase_verbs, lemma_verbs, curr_first, curr_last, # prev_last, # next_first, prev_last_curr_first, # curr_last_next_first, # production_rule_list, # position, # # mine # con_str, con_lstr, con_cat, # conn_to_root_path, # conn_to_root_compressed_path, # conn_curr_position ] features = [feature_function(arg_clauses, clause_index, parse_dict) for feature_function in feature_function_list] #合并特征 feature = util.mergeFeatures(features) return feature
def syn_syn(parse_dict, DocID, sent_index, conn_indices): # load dict self_parent_dict = Explicit_dict().self_parent_dict self_right_dict = Explicit_dict().self_right_dict self_left_dict = Explicit_dict().self_left_dict parent_left_dict = Explicit_dict().parent_left_dict parent_right_dict = Explicit_dict().parent_right_dict left_right_dict = Explicit_dict().left_right_dict # feature parse_tree = parse_dict[DocID]["sentences"][sent_index]["parsetree"].strip() syntax_tree = Syntax_tree(parse_tree) self_category = dict_util.get_self_category(syntax_tree, conn_indices) parent_category = dict_util.get_parent_category(syntax_tree, conn_indices) left_sibling_category = dict_util.get_left_sibling_category(syntax_tree, conn_indices) right_sibling_category = dict_util.get_right_sibling_category(syntax_tree, conn_indices) self_parent = "%s|%s" % (self_category, parent_category) self_right = "%s|%s" % (self_category, right_sibling_category) self_left = "%s|%s" % (self_category, left_sibling_category) parent_left = "%s|%s" % (parent_category, left_sibling_category) parent_right = "%s|%s" % (parent_category, right_sibling_category) left_right = "%s|%s" % (left_sibling_category, right_sibling_category) features = [] features.append(get_feature_by_feat(self_parent_dict, self_parent)) features.append(get_feature_by_feat(self_right_dict, self_right )) features.append(get_feature_by_feat(self_left_dict, self_left)) features.append(get_feature_by_feat(parent_left_dict, parent_left)) features.append(get_feature_by_feat(parent_right_dict, parent_right)) features.append(get_feature_by_feat(left_right_dict, left_right)) return util.mergeFeatures(features)
def _all_features(parse_dict, constituent, i, constituents): feature_function_list = [ CON_POS, NT_prev_curr_Path, CParent_to_root_path, self_category, CParent_to_root_path_node_names, left_sibling_category, NT_to_root_path, conn_parent_categoryCtx, parent_category, conn_rightSiblingCtx, CON_Str, CON_LStr, CON_Cat, CON_iRSib, NT_Ctx, CON_NT_Path, CON_NT_Path_iLsib, ] features = [feature_function(parse_dict, constituent, i, constituents) for feature_function in feature_function_list] # merge features feature = util.mergeFeatures(features) return feature
def firstlast_first3(relation, parse_dict): # load dict dict_Arg1_first = Non_Explicit_dict().dict_Arg1_first dict_Arg1_last = Non_Explicit_dict().dict_Arg1_last dict_Arg2_first = Non_Explicit_dict().dict_Arg2_first dict_Arg2_last = Non_Explicit_dict().dict_Arg2_last dict_Arg1_first_Arg2_first = Non_Explicit_dict().dict_Arg1_first_Arg2_first dict_Arg1_last_Arg2_last = Non_Explicit_dict().dict_Arg1_last_Arg2_last dict_Arg1_first3 = Non_Explicit_dict().dict_Arg1_first3 dict_Arg2_first3 = Non_Explicit_dict().dict_Arg2_first3 ''' feature ''' Arg1_first, Arg1_last, Arg2_first, Arg2_last,\ Arg1_first_Arg2_first, Arg1_last_Arg2_last,\ Arg1_first3, Arg2_first3 \ = dict_util.get_firstlast_first3(relation, parse_dict) features = [] features.append(get_feature_by_feat(dict_Arg1_first,Arg1_first)) features.append(get_feature_by_feat(dict_Arg1_last,Arg1_last)) features.append(get_feature_by_feat(dict_Arg2_first,Arg2_first)) features.append(get_feature_by_feat(dict_Arg2_last,Arg2_last)) features.append(get_feature_by_feat(dict_Arg1_first_Arg2_first,Arg1_first_Arg2_first)) features.append(get_feature_by_feat(dict_Arg1_last_Arg2_last,Arg1_last_Arg2_last)) features.append(get_feature_by_feat(dict_Arg1_first3,Arg1_first3)) features.append(get_feature_by_feat(dict_Arg2_first3,Arg2_first3)) return util.mergeFeatures(features)
def _all_features(arg_clauses, clause_index, parse_dict): # load dict dict_lowercase_verbs = Implicit_arg2_dict().dict_lowercase_verbs dict_lemma_verbs = Implicit_arg2_dict().dict_lemma_verbs dict_curr_first = Implicit_arg2_dict().dict_curr_first dict_curr_last = Implicit_arg2_dict().dict_curr_last dict_prev_last = Implicit_arg2_dict().dict_prev_last dict_next_first = Implicit_arg2_dict().dict_next_first dict_prev_last_curr_first = Implicit_arg2_dict().dict_prev_last_curr_first dict_curr_last_next_first = Implicit_arg2_dict().dict_curr_last_next_first dict_position = {"left": 1, "middle": 2, "right": 3} dict_prev_curr_CP_production_rule = Implicit_arg2_dict().dict_prev_curr_CP_production_rule dict_prev2_pos_lemma_verb = Implicit_arg2_dict().dict_prev2_pos_lemma_verb # feature lowercase_verbs_list = dict_util.get_curr_lowercased_verbs(arg_clauses, clause_index, parse_dict) lemma_verbs_list = dict_util.get_curr_lemma_verbs(arg_clauses, clause_index, parse_dict) curr_first = dict_util.get_curr_first(arg_clauses, clause_index, parse_dict) curr_last = dict_util.get_curr_last(arg_clauses, clause_index, parse_dict) prev_last = dict_util.get_prev_last(arg_clauses, clause_index, parse_dict) next_first = dict_util.get_next_first(arg_clauses, clause_index, parse_dict) prev_last_curr_first = "%s_%s" % (prev_last, curr_first) curr_last_next_first = "%s_%s" % (curr_last, next_first) # the number of words in current clause clause_word_num = len(arg_clauses.clauses[clause_index][0]) # the position of current clause position = dict_util.get_curr_position(arg_clauses, clause_index, parse_dict) prev_curr_CP_production_rule = dict_util.get_prev_curr_CP_production_rule(arg_clauses, clause_index, parse_dict) prev2_pos_lemma_verb = dict_util.get_2prev_pos_lemma_verb(arg_clauses, clause_index, parse_dict) features = [] features.append(get_feature_by_feat_list(dict_lowercase_verbs, lowercase_verbs_list)) features.append(get_feature_by_feat_list(dict_lemma_verbs, lemma_verbs_list)) features.append(get_feature_by_feat(dict_curr_first, curr_first)) features.append(get_feature_by_feat(dict_curr_last, curr_last)) features.append(get_feature_by_feat(dict_prev_last, prev_last)) features.append(get_feature_by_feat(dict_next_first, next_first)) features.append(get_feature_by_feat(dict_prev_last_curr_first, prev_last_curr_first)) features.append(get_feature_by_feat(dict_curr_last_next_first, curr_last_next_first)) features.append(get_feature_by_feat(dict_position, position)) features.append(Feature("", 1, {"1": clause_word_num})) features.append(get_feature_by_feat(dict_prev2_pos_lemma_verb, prev2_pos_lemma_verb)) ''' production rules ''' features.append(get_feature_by_feat_list(dict_prev_curr_CP_production_rule, prev_curr_CP_production_rule)) return util.mergeFeatures(features)
def all_features(parse_dict, constituent, i, constituents): syntax_tree = constituent.syntax_tree conn_category = Connectives_dict().conn_category connective = constituent.connective ''' feat dict ''' feat_dict_CON_Str = {} feat_dict_CON_LStr = {} feat_dict_NT_Ctx = {} feat_dict_CON_NT_Path = {} feat_dict_CON_NT_Path_iLsib = {} ''' load dict ''' dict_CON_Str = NT_dict().dict_CON_Str dict_CON_LStr = NT_dict().dict_CON_LStr dict_NT_Ctx = NT_dict().dict_NT_Ctx dict_CON_NT_Path = NT_dict().dict_CON_NT_Path dict_CON_NT_Path_iLsib = NT_dict().dict_CON_NT_Path_iLsib ''' feature ''' conn_indices = connective.token_indices DocID = connective.DocID sent_index = connective.sent_index conn_node = dict_util.get_conn_node(syntax_tree, conn_indices) CON_Str = dict_util.get_CON_Str(parse_dict, DocID, sent_index, conn_indices) CON_LStr = CON_Str.lower() CON_Cat = conn_category[connective.name] CON_iLSib = dict_util.get_CON_iLSib(syntax_tree, conn_node) CON_iRSib = dict_util.get_CON_iRSib(syntax_tree, conn_node) NT_Ctx = dict_util.get_NT_Ctx(constituent) CON_NT_Path = dict_util.get_CON_NT_Path(conn_node, constituent) CON_NT_Position = dict_util.get_CON_NT_Position(conn_node, constituent) if CON_iLSib > 1: CON_NT_Path_iLsib = CON_NT_Path + ":>1" else: CON_NT_Path_iLsib = CON_NT_Path + ":<=1" features = [] features.append(get_feature(feat_dict_CON_Str, dict_CON_Str, CON_Str)) features.append(get_feature(feat_dict_CON_LStr, dict_CON_LStr, CON_LStr)) features.append(get_feature(feat_dict_NT_Ctx, dict_NT_Ctx, NT_Ctx)) features.append( get_feature(feat_dict_CON_NT_Path, dict_CON_NT_Path, CON_NT_Path)) features.append( get_feature(feat_dict_CON_NT_Path_iLsib, dict_CON_NT_Path_iLsib, CON_NT_Path_iLsib)) # cat dict_category = {"subordinator": 1, "coordinator": 2, "adverbial": 3} features.append(get_feature({}, dict_category, CON_Cat)) #number features.append(Feature("", 1, {1: CON_iLSib})) features.append(Feature("", 1, {1: CON_iRSib})) #position dict_position = {"right": 1, "left": 2} features.append(get_feature({}, dict_position, CON_NT_Position)) return util.mergeFeatures(features)
def dependency_rules(relation, parse_dict): ''' load dict ''' dict_dependency_rules = Non_Explicit_dict().dict_dependency_rules ''' feature ''' Arg1_dependency_rules = dict_util.get_Arg_dependency_rules(relation, "Arg1", parse_dict) Arg2_dependency_rules = dict_util.get_Arg_dependency_rules(relation, "Arg2", parse_dict) Arg1_and_Arg2_dependency_rules = list(set(Arg1_dependency_rules) & set(Arg2_dependency_rules)) feat_Arg1 = get_feature_by_feat_list(dict_dependency_rules, Arg1_dependency_rules) feat_Arg2 = get_feature_by_feat_list(dict_dependency_rules, Arg2_dependency_rules) feat_Arg1_and_Arg2 = get_feature_by_feat_list(dict_dependency_rules, Arg1_and_Arg2_dependency_rules) return util.mergeFeatures([feat_Arg1, feat_Arg2, feat_Arg1_and_Arg2])
def all_features(arg_clauses, clause_index, parse_dict): feature_function_list = [ prev_curr_CP_production_rule, curr_last, is_NNP_WP, is_curr_NNP_prev_PRP_or_NNP, clause_word_num, prev2_pos_lemma_verb, lemma_verbs ] features = [ feature_function(arg_clauses, clause_index, parse_dict) for feature_function in feature_function_list ] # merge features feature = util.mergeFeatures(features) return feature
def all_features(arg_clauses, clause_index, parse_dict): feature_function_list = [ production_rule_list, curr_first, curr_first_prev_last_parse_path, next_first, conn_to_root_path, con_str, prev_last, curr_last_next_first, con_lstr, conn_connCtx, conn_to_root_compressed_path, CPOS, CParent_to_root_path_node_names, con_cat ] features = [ feature_function(arg_clauses, clause_index, parse_dict) for feature_function in feature_function_list ] # merge features feature = util.mergeFeatures(features) return feature
def all_features(arg_clauses, clause_index, parse_dict): feature_function_list = [ prev_curr_CP_production_rule, is_NNP_WP, is_curr_NNP_prev_PRP_or_NNP, clause_word_num, prev2_pos_lemma_verb, next_first, prev_last, ] features = [feature_function(arg_clauses, clause_index, parse_dict) for feature_function in feature_function_list] # merge features feature = util.mergeFeatures(features) return feature
def modality(relation, parse_dict): '''feature''' Arg1_words = dict_util.get_Arg_Words_List(relation, "Arg1", parse_dict) Arg2_words = dict_util.get_Arg_Words_List(relation, "Arg2", parse_dict) # Arg1_modality_vec = dict_util.get_modality_vec(Arg1_words) Arg2_modality_vec = dict_util.get_modality_vec(Arg2_words) cp = util.cross_product(Arg1_modality_vec, Arg2_modality_vec) features = [] features.append(get_feature_by_list(Arg1_modality_vec)) features.append(get_feature_by_list(Arg2_modality_vec)) features.append(get_feature_by_list(cp)) return util.mergeFeatures(features)
def all_features(relation, parse_dict): feature_function_list = [ # word_pairs, production_rules, dependency_rules, firstlast_first3, # polarity, modality, verbs, brown_cluster_pair, Inquirer, MPQA_polarity, ] features = [feature_function(relation, parse_dict) for feature_function in feature_function_list] # merge features feature = mergeFeatures(features) return feature
def _all_features(parse_dict, connective): DocID = connective.DocID sent_index = connective.sent_index conn_indices = connective.token_indices feature_function_list = [ # Z.lin CString, CPOS, C_Prev, CLString, # Pitler self_category, parent_category, left_sibling_category, right_sibling_category, # conn - syn conn_self_category, conn_parent_category, conn_left_sibling_category, conn_right_sibling_category, # syn - syn self_parent, self_right, self_left, parent_left, parent_right, left_right, # mine conn_parent_category_ctx, as_prev_conn, as_prev_connPOS, when_prev_conn, when_prev_connPOS ] features = [feature_function(parse_dict, DocID, sent_index, conn_indices) for feature_function in feature_function_list] # merge features feature = util.mergeFeatures(features) return feature
def all_features(arg_clauses, clause_index, parse_dict): feature_function_list = [ production_rule_list, curr_first, curr_first_prev_last_parse_path, next_first, conn_to_root_path, con_str, prev_last, curr_last_next_first, con_lstr, conn_connCtx, conn_to_root_compressed_path, CPOS, CParent_to_root_path_node_names, con_cat ] features = [feature_function(arg_clauses, clause_index, parse_dict) for feature_function in feature_function_list] # merge features feature = util.mergeFeatures(features) return feature
def Arg_word2vec(relation, parse_dict): ''' load dict ''' dict_word2vec = Non_Explicit_dict().word2vec_dict ''' feature ''' Arg1_words = dict_util._get_lower_case_lemma_words(relation, "Arg1", parse_dict) Arg2_words = dict_util._get_lower_case_lemma_words(relation, "Arg2", parse_dict) Arg1_words = list(set(Arg1_words)) Arg2_words = list(set(Arg2_words)) Arg1_vec = [0.0] * 300 Arg1_length = 0 for word in Arg1_words: if word in dict_word2vec: vec = dict_word2vec[word] Arg1_vec = util.vec_plus_vec(Arg1_vec, vec) Arg1_length += 1 Arg2_vec = [0.0] * 300 Arg2_length = 0 for word in Arg2_words: if word in dict_word2vec: vec = dict_word2vec[word] Arg2_vec = util.vec_plus_vec(Arg2_vec, vec) Arg2_length += 1 # average if Arg1_length != 0: Arg1_vec = [v / Arg1_length for v in Arg1_vec] if Arg2_length != 0: Arg2_vec = [v / Arg2_length for v in Arg2_vec] feat1 = get_feature_by_list(Arg1_vec) feat2 = get_feature_by_list(Arg2_vec) return util.mergeFeatures([feat1, feat2])
def Arg_word2vec(relation, parse_dict): ''' load dict ''' dict_word2vec = Non_Explicit_dict().word2vec_dict ''' feature ''' Arg1_words = dict_util._get_lower_case_lemma_words(relation, "Arg1", parse_dict) Arg2_words = dict_util._get_lower_case_lemma_words(relation, "Arg2", parse_dict) Arg1_words = list(set(Arg1_words)) Arg2_words = list(set(Arg2_words)) Arg1_vec = [0.0] * 300 Arg1_length = 0 for word in Arg1_words: if word in dict_word2vec: vec = dict_word2vec[word] Arg1_vec = util.vec_plus_vec(Arg1_vec, vec) Arg1_length += 1 Arg2_vec = [0.0] * 300 Arg2_length = 0 for word in Arg2_words: if word in dict_word2vec: vec = dict_word2vec[word] Arg2_vec = util.vec_plus_vec(Arg2_vec, vec) Arg2_length += 1 # average if Arg1_length != 0: Arg1_vec = [v/Arg1_length for v in Arg1_vec] if Arg2_length != 0: Arg2_vec = [v/Arg2_length for v in Arg2_vec] feat1 = get_feature_by_list(Arg1_vec) feat2 = get_feature_by_list(Arg2_vec) return util.mergeFeatures([feat1, feat2])
def all_features(parse_dict, DocID, sent_index, conn_indices): ''' feat dict ''' feat_dict_CString = {} feat_dict_CPOS = {} feat_dict_prev1 = {} feat_dict_prev1POS = {} feat_dict_prev1_C = {} feat_dict_prev1POS_CPOS = {} feat_dict_prev2 = {} feat_dict_prev2POS = {} feat_dict_prev2_C = {} feat_dict_prev2POS_CPOS = {} feat_dict_next1POS_CPOS = {} feat_dict_next2 = {} ''' load dict ''' dict_CString = Arg_position_dict().dict_CString dict_CPOS = Arg_position_dict().dict_CPOS dict_prev1 = Arg_position_dict().dict_prev1 dict_prev1POS = Arg_position_dict().dict_prev1POS dict_prev1_C = Arg_position_dict().dict_prev1_C dict_prev1POS_CPOS = Arg_position_dict().dict_prev1POS_CPOS dict_prev2 = Arg_position_dict().dict_prev2 dict_prev2POS = Arg_position_dict().dict_prev2POS dict_prev2_C = Arg_position_dict().dict_prev2_C dict_prev2POS_CPOS = Arg_position_dict().dict_prev2POS_CPOS dict_conn_to_root_path = Arg_position_dict().dict_conn_to_root_path dict_next1POS_CPOS = Arg_position_dict().dict_next1POS_CPOS dict_next2 = Arg_position_dict().dict_next2 ''' feature ''' C_String = dict_util.get_C_String(parse_dict, DocID, sent_index, conn_indices) CPOS = dict_util.get_CPOS(parse_dict, DocID, sent_index, conn_indices) prev1 = dict_util.get_prev1(parse_dict, DocID, sent_index, conn_indices) prev1POS = dict_util.get_prev1POS(parse_dict, DocID, sent_index, conn_indices) prev2 = dict_util.get_prev2(parse_dict, DocID, sent_index, conn_indices) prev2POS = dict_util.get_prev2POS(parse_dict, DocID, sent_index, conn_indices) prev1_C = "%s|%s" % (prev1, C_String) prev1POS_CPOS = "%s|%s" % (prev1POS, CPOS) prev2_C = "%s|%s" % (prev2, C_String) prev2POS_CPOS = "%s|%s" % (prev2POS, CPOS) next1, next1POS = dict_util.get_next1_next1POS(parse_dict, DocID, sent_index, conn_indices) next2, next2POS = dict_util.get_next2_next2POS(parse_dict, DocID, sent_index, conn_indices) next1POS_CPOS = "%s|%s" % (CPOS, next1POS) conn_to_root_path = dict_util.get_conn_to_root_path(parse_dict, DocID, sent_index, conn_indices) features = [] features.append(get_feature(feat_dict_CString, dict_CString, C_String)) features.append(C_Position_feature(parse_dict, DocID, sent_index, conn_indices))# position feature features.append(get_feature(feat_dict_CPOS, dict_CPOS, CPOS)) features.append(get_feature(feat_dict_prev1, dict_prev1, prev1)) features.append(get_feature(feat_dict_prev1POS, dict_prev1POS, prev1POS)) features.append(get_feature(feat_dict_prev1_C, dict_prev1_C, prev1_C)) features.append(get_feature(feat_dict_prev1POS_CPOS, dict_prev1POS_CPOS, prev1POS_CPOS)) features.append(get_feature(feat_dict_prev2, dict_prev2, prev2)) features.append(get_feature(feat_dict_prev2POS, dict_prev2POS, prev2POS)) features.append(get_feature(feat_dict_prev2_C, dict_prev2_C, prev2_C)) features.append(get_feature(feat_dict_prev2POS_CPOS, dict_prev2POS_CPOS, prev2POS_CPOS)) features.append(get_feature(feat_dict_next1POS_CPOS, dict_next1POS_CPOS, next1POS_CPOS)) features.append(get_feature(feat_dict_next2, dict_next2, next2)) features.append(get_feature_by_feat(dict_conn_to_root_path, conn_to_root_path)) return util.mergeFeatures(features)
def all_features(parse_dict, DocID, sent_index, conn_indices): # feat dict '''Z.Lin''' feat_dict_CPOS_dict = {} feat_dict_prev_C_dict = {} feat_dict_prevPOS_dict = {} feat_dict_prevPOS_CPOS_dict = {} feat_dict_C_next_dict = {} feat_dict_nextPOS_dict = {} feat_dict_CPOS_nextPOS_dict = {} feat_dict_CParent_to_root_path_dict = {} feat_dict_compressed_CParent_to_root_path_dict = {} '''Pitler''' feat_dict_self_category_dict = {} feat_dict_parent_category_dict = {} feat_dict_left_sibling_category_dict = {} feat_dict_right_sibling_category_dict = {} ''' conn_syn ''' feat_dict_conn_self_category_dict = {} feat_dict_conn_parent_category_dict = {} feat_dict_conn_left_sibling_category_dict = {} feat_dict_conn_right_sibling_category_dict = {} ''' syn_syn ''' feat_dict_self_parent = {} feat_dict_self_right = {} feat_dict_self_left = {} feat_dict_parent_left = {} feat_dict_parent_right = {} feat_dict_left_right = {} #dict '''Z.Lin''' CPOS_dict = Connectives_dict().cpos_dict prev_C_dict = Connectives_dict().prev_C_dict prevPOS_dict = Connectives_dict().prevPOS_dict prevPOS_CPOS_dict = Connectives_dict().prevPOS_CPOS_dict C_next_dict = Connectives_dict().C_next_dict nextPOS_dict = Connectives_dict().nextPOS_dict CPOS_nextPOS_dict = Connectives_dict().CPOS_nextPOS_dict CParent_to_root_path_dict = Connectives_dict().CParent_to_root_path_dict compressed_CParent_to_root_path_dict = Connectives_dict().compressed_CParent_to_root_path_dict '''Pitler''' self_category_dict = Connectives_dict().self_category_dict parent_category_dict = Connectives_dict().parent_category_dict left_sibling_category_dict = Connectives_dict().left_sibling_category_dict right_sibling_category_dict = Connectives_dict().right_sibling_category_dict ''' conn_syn ''' conn_self_category_dict = Connectives_dict().conn_self_category_dict conn_parent_category_dict = Connectives_dict().conn_parent_category_dict conn_left_sibling_category_dict = Connectives_dict().conn_left_sibling_category_dict conn_right_sibling_category_dict = Connectives_dict().conn_right_sibling_category_dict ''' syn_syn ''' self_parent_dict = Connectives_dict().self_parent_dict self_right_dict = Connectives_dict().self_right_dict self_left_dict = Connectives_dict().self_left_dict parent_left_dict = Connectives_dict().parent_left_dict parent_right_dict = Connectives_dict().parent_right_dict left_right_dict = Connectives_dict().left_right_dict ''' mine ''' dict_conn_lower_case = Connectives_dict().dict_conn_lower_case dict_conn = Connectives_dict().dict_conn dict_CParent_to_root_path_node_names = Connectives_dict().dict_CParent_to_root_path_node_names dict_conn_rightSiblingCtx = Connectives_dict().dict_conn_rightSiblingCtx dict_conn_parent_category_Ctx = Connectives_dict().dict_conn_parent_category_Ctx ''' c pos ''' pos_tag_list = [] for conn_index in conn_indices: pos_tag_list.append(parse_dict[DocID]["sentences"][sent_index]["words"][conn_index][1]["PartOfSpeech"]) CPOS = "_".join(pos_tag_list) ''' prev ''' flag = 0 prev_index = conn_indices[0] - 1 prev_sent_index = sent_index if prev_index < 0: prev_index = -1 prev_sent_index -= 1 if prev_sent_index < 0: flag = 1 if flag == 1 : prev = "NONE" else: prev = parse_dict[DocID]["sentences"][prev_sent_index]["words"][prev_index][0] ''' conn_name ''' conn_name = " ".join([parse_dict[DocID]["sentences"][sent_index]["words"][word_token][0] \ for word_token in conn_indices ]) '''prevPOS''' if prev == "NONE": prevPOS = "NONE" else: prevPOS = parse_dict[DocID]["sentences"][prev_sent_index]["words"][prev_index][1]["PartOfSpeech"] '''next''' sent_count = len(parse_dict[DocID]["sentences"]) sent_length = len(parse_dict[DocID]["sentences"][sent_index]["words"]) flag = 0 next_index = conn_indices[-1] + 1 next_sent_index = sent_index if next_index >= sent_length: next_sent_index += 1 next_index = 0 if next_sent_index >= sent_count: flag = 1 if flag == 1: next = "NONE" else: next = parse_dict[DocID]["sentences"][next_sent_index]["words"][next_index][0] ''' next pos ''' if next == "NONE": nextPOS = "NONE" else: nextPOS = parse_dict[DocID]["sentences"][next_sent_index]["words"][next_index][1]["PartOfSpeech"] parse_tree = parse_dict[DocID]["sentences"][sent_index]["parsetree"].strip() syntax_tree = Syntax_tree(parse_tree) ''' c parent to root ''' if syntax_tree.tree == None: cparent_to_root_path = "NONE_TREE" else: cparent_to_root_path = "" for conn_index in conn_indices: conn_node = syntax_tree.get_leaf_node_by_token_index(conn_index) conn_parent_node = conn_node.up cparent_to_root_path += syntax_tree.get_node_path_to_root(conn_parent_node) + "&" if cparent_to_root_path[-1] == "&": cparent_to_root_path = cparent_to_root_path[:-1] ''' compressed c parent to root ''' if syntax_tree.tree == None: compressed_path = "NONE_TREE" else: compressed_path = "" for conn_index in conn_indices: conn_node = syntax_tree.get_leaf_node_by_token_index(conn_index) conn_parent_node = conn_node.up path = syntax_tree.get_node_path_to_root(conn_parent_node) compressed_path += util.get_compressed_path(path) + "&" if compressed_path[-1] == "&": compressed_path = compressed_path[:-1] ''' Pitler ''' if syntax_tree.tree == None: self_category = "NONE_TREE" else: self_category = syntax_tree.get_self_category_node_by_token_indices(conn_indices).name if syntax_tree.tree == None: parent_category = "NONE_TREE" else: parent_category_node = syntax_tree.get_parent_category_node_by_token_indices(conn_indices) if parent_category_node == None: parent_category = "ROOT" else: parent_category = parent_category_node.name if syntax_tree.tree == None: left_sibling_category = "NONE_TREE" else: left_sibling_category_node = syntax_tree.get_left_sibling_category_node_by_token_indices(conn_indices) if left_sibling_category_node == None: left_sibling_category = "NONE" else: left_sibling_category = left_sibling_category_node.name if syntax_tree.tree == None: right_sibling_category = "NONE_TREE" else: right_sibling_category_node = syntax_tree.get_right_sibling_category_node_by_token_indices(conn_indices) if right_sibling_category_node == None: right_sibling_category = "NONE" else: right_sibling_category = right_sibling_category_node.name prev_C = "%s|%s" % (prev, conn_name) prePOS_CPOS = "%s|%s" % (prevPOS, CPOS) C_next = "%s|%s" % (conn_name, next) CPOS_nextPOS = "%s|%s" % (CPOS, nextPOS) conn_self_category = "%s|%s" % (conn_name, self_category) conn_parent_category = "%s|%s" % (conn_name, parent_category) conn_left_sibling_category = "%s|%s" % (conn_name, left_sibling_category) conn_right_sibling_category = "%s|%s" % (conn_name, right_sibling_category) self_parent = "%s|%s" % (self_category, parent_category) self_right = "%s|%s" % (self_category, right_sibling_category) self_left = "%s|%s" % (self_category, left_sibling_category) parent_left = "%s|%s" % (parent_category, left_sibling_category) parent_right = "%s|%s" % (parent_category, right_sibling_category) left_right = "%s|%s" % (left_sibling_category, right_sibling_category) '''--- mine ---''' conn_lower_case = conn_name.lower() # prevPOS_C = "%s|%s" % (prevPOS, conn_name.lower()) if syntax_tree.tree == None: _path = "NONE_TREE" else: _path = "" for conn_index in conn_indices: conn_node = syntax_tree.get_leaf_node_by_token_index(conn_index) conn_parent_node = conn_node.up _path += syntax_tree.get_node_path_to_root(conn_parent_node) + "-->" if _path[-3:] == "-->": _path = _path[:-3] # conn + connCtx if syntax_tree.tree == None: connCtx = "NONE_TREE" else: conn_node = syntax_tree.get_self_category_node_by_token_indices(conn_indices) connCtx = dict_util.get_node_Ctx(conn_node, syntax_tree) conn_connCtx = "%s|%s" % (conn_name, connCtx) # conn + right sibling ctx if syntax_tree.tree == None: rightSiblingCtx = "NONE_TREE" else: rightSibling_node = syntax_tree.get_right_sibling_category_node_by_token_indices(conn_indices) rightSiblingCtx = dict_util.get_node_linked_Ctx(rightSibling_node, syntax_tree) conn_rightSiblingCtx = "%s|%s" % (conn_name, rightSiblingCtx) # conn _ left sibling ctx if syntax_tree.tree == None: leftSiblingCtx = "NONE_TREE" else: leftSibling_node = syntax_tree.get_left_sibling_category_node_by_token_indices(conn_indices) leftSiblingCtx = dict_util.get_node_linked_Ctx(leftSibling_node, syntax_tree) # conn parent category ctx if syntax_tree.tree == None: parent_categoryCtx = "NONE_TREE" else: parent_category_node = syntax_tree.get_parent_category_node_by_token_indices(conn_indices) parent_categoryCtx = dict_util.get_node_linked_Ctx(parent_category_node, syntax_tree) conn_parent_categoryCtx = "%s|%s" % (conn_name, parent_categoryCtx) features = [] '''Z.Lin''' features.append(get_feature(feat_dict_CPOS_dict, CPOS_dict, CPOS)) features.append(get_feature(feat_dict_prev_C_dict, prev_C_dict, prev_C)) features.append(get_feature(feat_dict_prevPOS_dict, prevPOS_dict, prevPOS)) features.append(get_feature(feat_dict_prevPOS_CPOS_dict, prevPOS_CPOS_dict, prePOS_CPOS )) features.append(get_feature(feat_dict_C_next_dict, C_next_dict, C_next)) features.append(get_feature(feat_dict_nextPOS_dict, nextPOS_dict, nextPOS)) features.append(get_feature(feat_dict_CPOS_nextPOS_dict, CPOS_nextPOS_dict, CPOS_nextPOS)) features.append(get_feature(feat_dict_CParent_to_root_path_dict,CParent_to_root_path_dict, cparent_to_root_path )) features.append(get_feature(feat_dict_compressed_CParent_to_root_path_dict, compressed_CParent_to_root_path_dict, compressed_path)) ''' pitler ''' features.append(get_feature(feat_dict_self_category_dict, self_category_dict, self_category)) features.append(get_feature(feat_dict_parent_category_dict, parent_category_dict, parent_category)) features.append(get_feature(feat_dict_left_sibling_category_dict, left_sibling_category_dict, left_sibling_category)) features.append(get_feature(feat_dict_right_sibling_category_dict, right_sibling_category_dict, right_sibling_category)) feat_dict_is_right_sibling_contains_VP = {} if syntax_tree.tree != None and right_sibling_category_node != None: T = right_sibling_category_node.get_descendants() T.append(right_sibling_category_node) for node in T: if node.name == "VP" or node.name == "S": feat_dict_is_right_sibling_contains_VP[1] = 1 break features.append(Feature("", 1, feat_dict_is_right_sibling_contains_VP)) ''' conn-syn ''' features.append(get_feature(feat_dict_conn_self_category_dict, conn_self_category_dict, conn_self_category)) features.append(get_feature(feat_dict_conn_parent_category_dict, conn_parent_category_dict, conn_parent_category)) features.append(get_feature(feat_dict_conn_left_sibling_category_dict, conn_left_sibling_category_dict, conn_left_sibling_category)) features.append(get_feature(feat_dict_conn_right_sibling_category_dict, conn_right_sibling_category_dict, conn_right_sibling_category)) ''' syn-syn ''' features.append(get_feature(feat_dict_self_parent, self_parent_dict, self_parent)) features.append(get_feature(feat_dict_self_right,self_right_dict, self_right )) features.append(get_feature(feat_dict_self_left, self_left_dict, self_left)) features.append(get_feature(feat_dict_parent_left, parent_left_dict, parent_left)) features.append(get_feature(feat_dict_parent_right, parent_right_dict, parent_right)) features.append(get_feature(feat_dict_left_right,left_right_dict, left_right)) ''' mine ''' features.append(get_feature_by_feat(dict_conn_lower_case, conn_lower_case)) features.append(get_feature_by_feat(dict_conn, conn_name)) features.append(get_feature_by_feat_list(dict_CParent_to_root_path_node_names, _path.split("-->"))) features.append(get_feature_by_feat(dict_conn_rightSiblingCtx, conn_rightSiblingCtx)) features.append(get_feature_by_feat(dict_conn_parent_category_Ctx, conn_parent_categoryCtx)) return util.mergeFeatures(features)
def all_features(parse_dict, constituent, i, constituents): syntax_tree = constituent.syntax_tree conn_category = Connectives_dict().conn_category connective = constituent.connective ''' feat dict ''' feat_dict_CON_Str = {} feat_dict_CON_LStr = {} feat_dict_NT_Ctx = {} feat_dict_CON_NT_Path = {} feat_dict_CON_NT_Path_iLsib = {} ''' load dict ''' dict_CON_Str = NT_dict().dict_CON_Str dict_CON_LStr = NT_dict().dict_CON_LStr dict_NT_Ctx = NT_dict().dict_NT_Ctx dict_CON_NT_Path = NT_dict().dict_CON_NT_Path dict_CON_NT_Path_iLsib = NT_dict().dict_CON_NT_Path_iLsib ''' feature ''' conn_indices = connective.token_indices DocID = connective.DocID sent_index = connective.sent_index conn_node = dict_util.get_conn_node(syntax_tree, conn_indices) CON_Str = dict_util.get_CON_Str(parse_dict, DocID, sent_index, conn_indices) CON_LStr = CON_Str.lower() CON_Cat = conn_category[connective.name] CON_iLSib = dict_util.get_CON_iLSib(syntax_tree,conn_node) CON_iRSib = dict_util.get_CON_iRSib(syntax_tree,conn_node) NT_Ctx = dict_util.get_NT_Ctx(constituent) CON_NT_Path = dict_util.get_CON_NT_Path(conn_node, constituent) CON_NT_Position = dict_util.get_CON_NT_Position(conn_node, constituent) if CON_iLSib > 1: CON_NT_Path_iLsib = CON_NT_Path + ":>1" else: CON_NT_Path_iLsib = CON_NT_Path + ":<=1" features = [] features.append(get_feature(feat_dict_CON_Str, dict_CON_Str , CON_Str)) features.append(get_feature(feat_dict_CON_LStr, dict_CON_LStr, CON_LStr)) features.append(get_feature(feat_dict_NT_Ctx, dict_NT_Ctx, NT_Ctx)) features.append(get_feature(feat_dict_CON_NT_Path, dict_CON_NT_Path, CON_NT_Path)) features.append(get_feature(feat_dict_CON_NT_Path_iLsib, dict_CON_NT_Path_iLsib, CON_NT_Path_iLsib)) # cat dict_category = {"subordinator": 1, "coordinator": 2, "adverbial": 3 } features.append(get_feature({}, dict_category , CON_Cat)) #number features.append(Feature("", 1, {1: CON_iLSib})) features.append(Feature("", 1, {1: CON_iRSib})) #position dict_position = {"right": 1, "left": 2} features.append(get_feature({}, dict_position , CON_NT_Position)) return util.mergeFeatures(features)
def all_features(parse_dict, connective): ''' feat dict ''' feat_dict_CString = {} feat_dict_CPOS = {} feat_dict_C_Prev = {} ''' load dict ''' dict_CString = Explicit_dict().dict_CString dict_CPOS = Explicit_dict().dict_CPOS dict_C_Prev = Explicit_dict().dict_C_Prev dict_CLString = Explicit_dict().dict_CLString '''Pitler''' self_category_dict = Explicit_dict().self_category_dict parent_category_dict = Explicit_dict().parent_category_dict left_sibling_category_dict = Explicit_dict().left_sibling_category_dict right_sibling_category_dict = Explicit_dict().right_sibling_category_dict ''' conn_syn ''' conn_self_category_dict = Explicit_dict().conn_self_category_dict conn_parent_category_dict = Explicit_dict().conn_parent_category_dict conn_left_sibling_category_dict = Explicit_dict().conn_left_sibling_category_dict conn_right_sibling_category_dict = Explicit_dict().conn_right_sibling_category_dict ''' syn-syn''' self_parent_dict = Explicit_dict().self_parent_dict self_right_dict = Explicit_dict().self_right_dict self_left_dict = Explicit_dict().self_left_dict parent_left_dict = Explicit_dict().parent_left_dict parent_right_dict = Explicit_dict().parent_right_dict left_right_dict = Explicit_dict().left_right_dict ''' mine ''' dict_conn_parent_category_ctx = Explicit_dict().dict_conn_parent_category_ctx dict_as_prev_conn = Explicit_dict().dict_as_prev_conn dict_as_prev_connPOS = Explicit_dict().dict_as_prev_connPOS dict_when_prev_conn = Explicit_dict().dict_when_prev_conn dict_when_prev_connPOS = Explicit_dict().dict_when_prev_connPOS ''' feature ''' DocID = connective.DocID sent_index = connective.sent_index conn_indices = connective.token_indices CString = dict_util.get_C_String(parse_dict, DocID, sent_index, conn_indices) CPOS = dict_util.get_CPOS(parse_dict, DocID, sent_index, conn_indices) prev = dict_util.get_prev1(parse_dict, DocID, sent_index, conn_indices) C_Prev = "%s|%s" % (CString, prev) CLString = CString.lower() # syntax tree parse_tree = parse_dict[DocID]["sentences"][sent_index]["parsetree"].strip() syntax_tree = Syntax_tree(parse_tree) #pitler self_category = dict_util.get_self_category(syntax_tree, conn_indices) parent_category = dict_util.get_parent_category(syntax_tree, conn_indices) left_sibling_category = dict_util.get_left_sibling_category(syntax_tree, conn_indices) right_sibling_category = dict_util.get_right_sibling_category(syntax_tree, conn_indices) #conn - syn conn_name = CLString conn_self_category = "%s|%s" % (conn_name, self_category) conn_parent_category = "%s|%s" % (conn_name, parent_category) conn_left_sibling_category = "%s|%s" % (conn_name, left_sibling_category) conn_right_sibling_category = "%s|%s" % (conn_name, right_sibling_category) #syn-syn self_parent = "%s|%s" % (self_category, parent_category) self_right = "%s|%s" % (self_category, right_sibling_category) self_left = "%s|%s" % (self_category, left_sibling_category) parent_left = "%s|%s" % (parent_category, left_sibling_category) parent_right = "%s|%s" % (parent_category, right_sibling_category) left_right = "%s|%s" % (left_sibling_category, right_sibling_category) ''' mine ''' conn_parent_category_ctx = dict_util.get_conn_parent_category_Ctx(parse_dict, DocID, sent_index, conn_indices) as_prev_conn = dict_util.get_as_prev_conn(parse_dict, DocID, sent_index, conn_indices) as_prev_connPOS = dict_util.get_as_prev_connPOS(parse_dict, DocID, sent_index, conn_indices) when_prev_conn = dict_util.get_when_prev_conn(parse_dict, DocID, sent_index, conn_indices) when_prev_connPOS = dict_util.get_when_prev_connPOS(parse_dict, DocID, sent_index, conn_indices) features = [] features.append(get_feature(feat_dict_CString, dict_CString , CString)) features.append(get_feature(feat_dict_CPOS, dict_CPOS , CPOS)) features.append(get_feature(feat_dict_C_Prev, dict_C_Prev , C_Prev)) features.append(get_feature({}, dict_CLString , CLString)) features.append(get_feature({}, self_category_dict , self_category)) features.append(get_feature({}, parent_category_dict , parent_category)) features.append(get_feature({}, left_sibling_category_dict , left_sibling_category)) features.append(get_feature({}, right_sibling_category_dict , right_sibling_category)) features.append(get_feature({}, conn_self_category_dict , conn_self_category)) features.append(get_feature({}, conn_parent_category_dict , conn_parent_category)) features.append(get_feature({}, conn_left_sibling_category_dict , conn_left_sibling_category)) features.append(get_feature({}, conn_right_sibling_category_dict , conn_right_sibling_category)) features.append(get_feature({}, self_parent_dict, self_parent)) features.append(get_feature({}, self_right_dict, self_right )) features.append(get_feature({}, self_left_dict, self_left)) features.append(get_feature({}, parent_left_dict, parent_left)) features.append(get_feature({}, parent_right_dict, parent_right)) features.append(get_feature({}, left_right_dict, left_right)) ''' mine ''' features.append(get_feature_by_feat(dict_conn_parent_category_ctx, conn_parent_category_ctx)) features.append(get_feature_by_feat(dict_as_prev_conn, as_prev_conn)) features.append(get_feature_by_feat(dict_as_prev_connPOS, as_prev_connPOS)) features.append(get_feature_by_feat(dict_when_prev_conn, when_prev_conn)) features.append(get_feature_by_feat(dict_when_prev_connPOS, when_prev_connPOS)) return util.mergeFeatures(features)
def _all_features(arg_clauses, clause_index, parse_dict): # load dict dict_lowercase_verbs = Ps_arg2_dict().dict_lowercase_verbs dict_lemma_verbs = Ps_arg2_dict().dict_lemma_verbs dict_curr_first = Ps_arg2_dict().dict_curr_first dict_curr_last = Ps_arg2_dict().dict_curr_last dict_prev_last = Ps_arg2_dict().dict_prev_last dict_next_first = Ps_arg2_dict().dict_next_first dict_prev_last_curr_first = Ps_arg2_dict().dict_prev_last_curr_first dict_curr_last_next_first = Ps_arg2_dict().dict_curr_last_next_first dict_curr_production_rule = Ps_arg2_dict().dict_curr_production_rule dict_position = {"left": 1, "middle": 2, "right": 3} ''' mine ''' dict_con_str = Ps_arg2_dict().dict_con_str dict_con_lstr = Ps_arg2_dict().dict_con_lstr dict_con_cat = {"subordinator": 1, "coordinator": 2, "adverbial": 3 } dict_conn_to_root_path = Ps_arg2_dict().dict_conn_to_root_path dict_conn_to_root_compressed_path = Ps_arg2_dict().dict_conn_to_root_compressed_path dict_conn_position = Ps_arg2_dict().dict_conn_position # feature lowercase_verbs_list = dict_util.get_curr_lowercased_verbs(arg_clauses, clause_index, parse_dict) lemma_verbs_list = dict_util.get_curr_lemma_verbs(arg_clauses, clause_index, parse_dict) curr_first = dict_util.get_curr_first(arg_clauses, clause_index, parse_dict) curr_last = dict_util.get_curr_last(arg_clauses, clause_index, parse_dict) prev_last = dict_util.get_prev_last(arg_clauses, clause_index, parse_dict) next_first = dict_util.get_next_first(arg_clauses, clause_index, parse_dict) prev_last_curr_first = "%s_%s" % (prev_last, curr_first) curr_last_next_first = "%s_%s" % (curr_last, next_first) # the position of the current clause position = dict_util.get_curr_position(arg_clauses, clause_index, parse_dict) production_rule_list = dict_util.get_curr_production_rule(arg_clauses, clause_index, parse_dict) ''' mine ''' con_str = dict_util.get_con_str(arg_clauses, clause_index, parse_dict) con_lstr = dict_util.get_con_lstr(arg_clauses, clause_index, parse_dict) con_cat = dict_util.get_con_cat(arg_clauses, clause_index, parse_dict) conn_to_root_path = dict_util.get_conn_to_root_path(arg_clauses, clause_index, parse_dict) conn_to_root_compressed_path = dict_util.get_conn_to_root_compressed_path(arg_clauses, clause_index, parse_dict) conn_position = dict_util.get_conn_position(arg_clauses, clause_index, parse_dict) features = [] features.append(get_feature_by_feat_list(dict_lowercase_verbs, lowercase_verbs_list)) features.append(get_feature_by_feat_list(dict_lemma_verbs, lemma_verbs_list)) features.append(get_feature_by_feat(dict_curr_first, curr_first)) features.append(get_feature_by_feat(dict_curr_last, curr_last)) features.append(get_feature_by_feat(dict_prev_last, prev_last)) features.append(get_feature_by_feat(dict_next_first, next_first)) features.append(get_feature_by_feat(dict_prev_last_curr_first, prev_last_curr_first)) features.append(get_feature_by_feat(dict_curr_last_next_first, curr_last_next_first)) features.append(get_feature_by_feat(dict_position, position)) ''' production rules ''' features.append(get_feature_by_feat_list(dict_curr_production_rule, production_rule_list)) ''' mine ''' features.append(get_feature_by_feat(dict_con_str, con_str)) features.append(get_feature_by_feat(dict_con_lstr, con_lstr)) features.append(get_feature_by_feat(dict_con_cat, con_cat)) features.append(get_feature_by_feat(dict_conn_to_root_path, conn_to_root_path)) features.append(get_feature_by_feat(dict_conn_to_root_compressed_path, conn_to_root_compressed_path)) features.append(get_feature_by_feat(dict_conn_position, conn_position)) return util.mergeFeatures(features)
def all_features(parse_dict, DocID, sent_index, conn_indices): # feat dict '''Z.Lin''' feat_dict_CPOS_dict = {} feat_dict_prev_C_dict = {} feat_dict_prevPOS_dict = {} feat_dict_prevPOS_CPOS_dict = {} feat_dict_C_next_dict = {} feat_dict_nextPOS_dict = {} feat_dict_CPOS_nextPOS_dict = {} feat_dict_CParent_to_root_path_dict = {} feat_dict_compressed_CParent_to_root_path_dict = {} '''Pitler''' feat_dict_self_category_dict = {} feat_dict_parent_category_dict = {} feat_dict_left_sibling_category_dict = {} feat_dict_right_sibling_category_dict = {} ''' conn_syn ''' feat_dict_conn_self_category_dict = {} feat_dict_conn_parent_category_dict = {} feat_dict_conn_left_sibling_category_dict = {} feat_dict_conn_right_sibling_category_dict = {} ''' syn_syn ''' feat_dict_self_parent = {} feat_dict_self_right = {} feat_dict_self_left = {} feat_dict_parent_left = {} feat_dict_parent_right = {} feat_dict_left_right = {} #dict '''Z.Lin''' CPOS_dict = Connectives_dict().cpos_dict prev_C_dict = Connectives_dict().prev_C_dict prevPOS_dict = Connectives_dict().prevPOS_dict prevPOS_CPOS_dict = Connectives_dict().prevPOS_CPOS_dict C_next_dict = Connectives_dict().C_next_dict nextPOS_dict = Connectives_dict().nextPOS_dict CPOS_nextPOS_dict = Connectives_dict().CPOS_nextPOS_dict CParent_to_root_path_dict = Connectives_dict().CParent_to_root_path_dict compressed_CParent_to_root_path_dict = Connectives_dict( ).compressed_CParent_to_root_path_dict '''Pitler''' self_category_dict = Connectives_dict().self_category_dict parent_category_dict = Connectives_dict().parent_category_dict left_sibling_category_dict = Connectives_dict().left_sibling_category_dict right_sibling_category_dict = Connectives_dict( ).right_sibling_category_dict ''' conn_syn ''' conn_self_category_dict = Connectives_dict().conn_self_category_dict conn_parent_category_dict = Connectives_dict().conn_parent_category_dict conn_left_sibling_category_dict = Connectives_dict( ).conn_left_sibling_category_dict conn_right_sibling_category_dict = Connectives_dict( ).conn_right_sibling_category_dict ''' syn_syn ''' self_parent_dict = Connectives_dict().self_parent_dict self_right_dict = Connectives_dict().self_right_dict self_left_dict = Connectives_dict().self_left_dict parent_left_dict = Connectives_dict().parent_left_dict parent_right_dict = Connectives_dict().parent_right_dict left_right_dict = Connectives_dict().left_right_dict ''' mine ''' dict_conn_lower_case = Connectives_dict().dict_conn_lower_case dict_conn = Connectives_dict().dict_conn dict_CParent_to_root_path_node_names = Connectives_dict( ).dict_CParent_to_root_path_node_names dict_conn_rightSiblingCtx = Connectives_dict().dict_conn_rightSiblingCtx dict_conn_parent_category_Ctx = Connectives_dict( ).dict_conn_parent_category_Ctx ''' c pos ''' pos_tag_list = [] for conn_index in conn_indices: pos_tag_list.append(parse_dict[DocID]["sentences"][sent_index]["words"] [conn_index][1]["PartOfSpeech"]) CPOS = "_".join(pos_tag_list) ''' prev ''' flag = 0 prev_index = conn_indices[0] - 1 prev_sent_index = sent_index if prev_index < 0: prev_index = -1 prev_sent_index -= 1 if prev_sent_index < 0: flag = 1 if flag == 1: prev = "NONE" else: prev = parse_dict[DocID]["sentences"][prev_sent_index]["words"][ prev_index][0] ''' conn_name ''' conn_name = " ".join([parse_dict[DocID]["sentences"][sent_index]["words"][word_token][0] \ for word_token in conn_indices ]) '''prevPOS''' if prev == "NONE": prevPOS = "NONE" else: prevPOS = parse_dict[DocID]["sentences"][prev_sent_index]["words"][ prev_index][1]["PartOfSpeech"] '''next''' sent_count = len(parse_dict[DocID]["sentences"]) sent_length = len(parse_dict[DocID]["sentences"][sent_index]["words"]) flag = 0 next_index = conn_indices[-1] + 1 next_sent_index = sent_index if next_index >= sent_length: next_sent_index += 1 next_index = 0 if next_sent_index >= sent_count: flag = 1 if flag == 1: next = "NONE" else: next = parse_dict[DocID]["sentences"][next_sent_index]["words"][ next_index][0] ''' next pos ''' if next == "NONE": nextPOS = "NONE" else: nextPOS = parse_dict[DocID]["sentences"][next_sent_index]["words"][ next_index][1]["PartOfSpeech"] parse_tree = parse_dict[DocID]["sentences"][sent_index]["parsetree"].strip( ) syntax_tree = Syntax_tree(parse_tree) ''' c parent to root ''' if syntax_tree.tree == None: cparent_to_root_path = "NONE_TREE" else: cparent_to_root_path = "" for conn_index in conn_indices: conn_node = syntax_tree.get_leaf_node_by_token_index(conn_index) conn_parent_node = conn_node.up cparent_to_root_path += syntax_tree.get_node_path_to_root( conn_parent_node) + "&" if cparent_to_root_path[-1] == "&": cparent_to_root_path = cparent_to_root_path[:-1] ''' compressed c parent to root ''' if syntax_tree.tree == None: compressed_path = "NONE_TREE" else: compressed_path = "" for conn_index in conn_indices: conn_node = syntax_tree.get_leaf_node_by_token_index(conn_index) conn_parent_node = conn_node.up path = syntax_tree.get_node_path_to_root(conn_parent_node) compressed_path += util.get_compressed_path(path) + "&" if compressed_path[-1] == "&": compressed_path = compressed_path[:-1] ''' Pitler ''' if syntax_tree.tree == None: self_category = "NONE_TREE" else: self_category = syntax_tree.get_self_category_node_by_token_indices( conn_indices).name if syntax_tree.tree == None: parent_category = "NONE_TREE" else: parent_category_node = syntax_tree.get_parent_category_node_by_token_indices( conn_indices) if parent_category_node == None: parent_category = "ROOT" else: parent_category = parent_category_node.name if syntax_tree.tree == None: left_sibling_category = "NONE_TREE" else: left_sibling_category_node = syntax_tree.get_left_sibling_category_node_by_token_indices( conn_indices) if left_sibling_category_node == None: left_sibling_category = "NONE" else: left_sibling_category = left_sibling_category_node.name if syntax_tree.tree == None: right_sibling_category = "NONE_TREE" else: right_sibling_category_node = syntax_tree.get_right_sibling_category_node_by_token_indices( conn_indices) if right_sibling_category_node == None: right_sibling_category = "NONE" else: right_sibling_category = right_sibling_category_node.name prev_C = "%s|%s" % (prev, conn_name) prePOS_CPOS = "%s|%s" % (prevPOS, CPOS) C_next = "%s|%s" % (conn_name, next) CPOS_nextPOS = "%s|%s" % (CPOS, nextPOS) conn_self_category = "%s|%s" % (conn_name, self_category) conn_parent_category = "%s|%s" % (conn_name, parent_category) conn_left_sibling_category = "%s|%s" % (conn_name, left_sibling_category) conn_right_sibling_category = "%s|%s" % (conn_name, right_sibling_category) self_parent = "%s|%s" % (self_category, parent_category) self_right = "%s|%s" % (self_category, right_sibling_category) self_left = "%s|%s" % (self_category, left_sibling_category) parent_left = "%s|%s" % (parent_category, left_sibling_category) parent_right = "%s|%s" % (parent_category, right_sibling_category) left_right = "%s|%s" % (left_sibling_category, right_sibling_category) '''--- mine ---''' conn_lower_case = conn_name.lower() # prevPOS_C = "%s|%s" % (prevPOS, conn_name.lower()) if syntax_tree.tree == None: _path = "NONE_TREE" else: _path = "" for conn_index in conn_indices: conn_node = syntax_tree.get_leaf_node_by_token_index(conn_index) conn_parent_node = conn_node.up _path += syntax_tree.get_node_path_to_root( conn_parent_node) + "-->" if _path[-3:] == "-->": _path = _path[:-3] # conn + connCtx if syntax_tree.tree == None: connCtx = "NONE_TREE" else: conn_node = syntax_tree.get_self_category_node_by_token_indices( conn_indices) connCtx = dict_util.get_node_Ctx(conn_node, syntax_tree) conn_connCtx = "%s|%s" % (conn_name, connCtx) # conn + right sibling ctx if syntax_tree.tree == None: rightSiblingCtx = "NONE_TREE" else: rightSibling_node = syntax_tree.get_right_sibling_category_node_by_token_indices( conn_indices) rightSiblingCtx = dict_util.get_node_linked_Ctx( rightSibling_node, syntax_tree) conn_rightSiblingCtx = "%s|%s" % (conn_name, rightSiblingCtx) # conn _ left sibling ctx if syntax_tree.tree == None: leftSiblingCtx = "NONE_TREE" else: leftSibling_node = syntax_tree.get_left_sibling_category_node_by_token_indices( conn_indices) leftSiblingCtx = dict_util.get_node_linked_Ctx(leftSibling_node, syntax_tree) # conn parent category ctx if syntax_tree.tree == None: parent_categoryCtx = "NONE_TREE" else: parent_category_node = syntax_tree.get_parent_category_node_by_token_indices( conn_indices) parent_categoryCtx = dict_util.get_node_linked_Ctx( parent_category_node, syntax_tree) conn_parent_categoryCtx = "%s|%s" % (conn_name, parent_categoryCtx) features = [] '''Z.Lin''' features.append(get_feature(feat_dict_CPOS_dict, CPOS_dict, CPOS)) features.append(get_feature(feat_dict_prev_C_dict, prev_C_dict, prev_C)) features.append(get_feature(feat_dict_prevPOS_dict, prevPOS_dict, prevPOS)) features.append( get_feature(feat_dict_prevPOS_CPOS_dict, prevPOS_CPOS_dict, prePOS_CPOS)) features.append(get_feature(feat_dict_C_next_dict, C_next_dict, C_next)) features.append(get_feature(feat_dict_nextPOS_dict, nextPOS_dict, nextPOS)) features.append( get_feature(feat_dict_CPOS_nextPOS_dict, CPOS_nextPOS_dict, CPOS_nextPOS)) features.append( get_feature(feat_dict_CParent_to_root_path_dict, CParent_to_root_path_dict, cparent_to_root_path)) features.append( get_feature(feat_dict_compressed_CParent_to_root_path_dict, compressed_CParent_to_root_path_dict, compressed_path)) ''' pitler ''' features.append( get_feature(feat_dict_self_category_dict, self_category_dict, self_category)) features.append( get_feature(feat_dict_parent_category_dict, parent_category_dict, parent_category)) features.append( get_feature(feat_dict_left_sibling_category_dict, left_sibling_category_dict, left_sibling_category)) features.append( get_feature(feat_dict_right_sibling_category_dict, right_sibling_category_dict, right_sibling_category)) feat_dict_is_right_sibling_contains_VP = {} if syntax_tree.tree != None and right_sibling_category_node != None: T = right_sibling_category_node.get_descendants() T.append(right_sibling_category_node) for node in T: if node.name == "VP" or node.name == "S": feat_dict_is_right_sibling_contains_VP[1] = 1 break features.append(Feature("", 1, feat_dict_is_right_sibling_contains_VP)) ''' conn-syn ''' features.append( get_feature(feat_dict_conn_self_category_dict, conn_self_category_dict, conn_self_category)) features.append( get_feature(feat_dict_conn_parent_category_dict, conn_parent_category_dict, conn_parent_category)) features.append( get_feature(feat_dict_conn_left_sibling_category_dict, conn_left_sibling_category_dict, conn_left_sibling_category)) features.append( get_feature(feat_dict_conn_right_sibling_category_dict, conn_right_sibling_category_dict, conn_right_sibling_category)) ''' syn-syn ''' features.append( get_feature(feat_dict_self_parent, self_parent_dict, self_parent)) features.append( get_feature(feat_dict_self_right, self_right_dict, self_right)) features.append(get_feature(feat_dict_self_left, self_left_dict, self_left)) features.append( get_feature(feat_dict_parent_left, parent_left_dict, parent_left)) features.append( get_feature(feat_dict_parent_right, parent_right_dict, parent_right)) features.append( get_feature(feat_dict_left_right, left_right_dict, left_right)) ''' mine ''' features.append(get_feature_by_feat(dict_conn_lower_case, conn_lower_case)) features.append(get_feature_by_feat(dict_conn, conn_name)) features.append( get_feature_by_feat_list(dict_CParent_to_root_path_node_names, _path.split("-->"))) features.append( get_feature_by_feat(dict_conn_rightSiblingCtx, conn_rightSiblingCtx)) features.append( get_feature_by_feat(dict_conn_parent_category_Ctx, conn_parent_categoryCtx)) return util.mergeFeatures(features)
def _all_features(arg_clauses, clause_index, parse_dict): # load dict dict_lowercase_verbs = Ps_arg2_dict().dict_lowercase_verbs dict_lemma_verbs = Ps_arg2_dict().dict_lemma_verbs dict_curr_first = Ps_arg2_dict().dict_curr_first dict_curr_last = Ps_arg2_dict().dict_curr_last dict_prev_last = Ps_arg2_dict().dict_prev_last dict_next_first = Ps_arg2_dict().dict_next_first dict_prev_last_curr_first = Ps_arg2_dict().dict_prev_last_curr_first dict_curr_last_next_first = Ps_arg2_dict().dict_curr_last_next_first dict_curr_production_rule = Ps_arg2_dict().dict_curr_production_rule dict_position = {"left": 1, "middle": 2, "right": 3} ''' mine ''' dict_con_str = Ps_arg2_dict().dict_con_str dict_con_lstr = Ps_arg2_dict().dict_con_lstr dict_con_cat = {"subordinator": 1, "coordinator": 2, "adverbial": 3} dict_conn_to_root_path = Ps_arg2_dict().dict_conn_to_root_path dict_conn_to_root_compressed_path = Ps_arg2_dict( ).dict_conn_to_root_compressed_path dict_conn_position = Ps_arg2_dict().dict_conn_position # feature lowercase_verbs_list = dict_util.get_curr_lowercased_verbs( arg_clauses, clause_index, parse_dict) lemma_verbs_list = dict_util.get_curr_lemma_verbs(arg_clauses, clause_index, parse_dict) curr_first = dict_util.get_curr_first(arg_clauses, clause_index, parse_dict) curr_last = dict_util.get_curr_last(arg_clauses, clause_index, parse_dict) prev_last = dict_util.get_prev_last(arg_clauses, clause_index, parse_dict) next_first = dict_util.get_next_first(arg_clauses, clause_index, parse_dict) prev_last_curr_first = "%s_%s" % (prev_last, curr_first) curr_last_next_first = "%s_%s" % (curr_last, next_first) # the position of the current clause position = dict_util.get_curr_position(arg_clauses, clause_index, parse_dict) production_rule_list = dict_util.get_curr_production_rule( arg_clauses, clause_index, parse_dict) ''' mine ''' con_str = dict_util.get_con_str(arg_clauses, clause_index, parse_dict) con_lstr = dict_util.get_con_lstr(arg_clauses, clause_index, parse_dict) con_cat = dict_util.get_con_cat(arg_clauses, clause_index, parse_dict) conn_to_root_path = dict_util.get_conn_to_root_path( arg_clauses, clause_index, parse_dict) conn_to_root_compressed_path = dict_util.get_conn_to_root_compressed_path( arg_clauses, clause_index, parse_dict) conn_position = dict_util.get_conn_position(arg_clauses, clause_index, parse_dict) features = [] features.append( get_feature_by_feat_list(dict_lowercase_verbs, lowercase_verbs_list)) features.append( get_feature_by_feat_list(dict_lemma_verbs, lemma_verbs_list)) features.append(get_feature_by_feat(dict_curr_first, curr_first)) features.append(get_feature_by_feat(dict_curr_last, curr_last)) features.append(get_feature_by_feat(dict_prev_last, prev_last)) features.append(get_feature_by_feat(dict_next_first, next_first)) features.append( get_feature_by_feat(dict_prev_last_curr_first, prev_last_curr_first)) features.append( get_feature_by_feat(dict_curr_last_next_first, curr_last_next_first)) features.append(get_feature_by_feat(dict_position, position)) ''' production rules ''' features.append( get_feature_by_feat_list(dict_curr_production_rule, production_rule_list)) ''' mine ''' features.append(get_feature_by_feat(dict_con_str, con_str)) features.append(get_feature_by_feat(dict_con_lstr, con_lstr)) features.append(get_feature_by_feat(dict_con_cat, con_cat)) features.append( get_feature_by_feat(dict_conn_to_root_path, conn_to_root_path)) features.append( get_feature_by_feat(dict_conn_to_root_compressed_path, conn_to_root_compressed_path)) features.append(get_feature_by_feat(dict_conn_position, conn_position)) return util.mergeFeatures(features)