Exemplo n.º 1
0
def _all_features(parse_dict, constituent, i, constituents):
    feature_function_list = [
        CON_POS,
        NT_prev_curr_Path,
        CParent_to_root_path,
        self_category,
        CParent_to_root_path_node_names,
        left_sibling_category,
        NT_to_root_path,
        conn_parent_categoryCtx,
        parent_category,
        conn_rightSiblingCtx,
        CON_Str,
        CON_LStr,
        CON_Cat,
        CON_iRSib,
        NT_Ctx,
        CON_NT_Path,
        CON_NT_Path_iLsib,
    ]

    features = [
        feature_function(parse_dict, constituent, i, constituents)
        for feature_function in feature_function_list
    ]
    # merge features
    feature = util.mergeFeatures(features)
    return feature
def conn_syn(parse_dict, DocID, sent_index, conn_indices):
    # load dict

    conn_self_category_dict = Explicit_dict().conn_self_category_dict
    conn_parent_category_dict = Explicit_dict().conn_parent_category_dict
    conn_left_sibling_category_dict = Explicit_dict().conn_left_sibling_category_dict
    conn_right_sibling_category_dict = Explicit_dict().conn_right_sibling_category_dict

    # feature
    CString = dict_util.get_C_String(parse_dict, DocID, sent_index, conn_indices)
    CLString = CString.lower()

    parse_tree = parse_dict[DocID]["sentences"][sent_index]["parsetree"].strip()
    syntax_tree = Syntax_tree(parse_tree)

    self_category = dict_util.get_self_category(syntax_tree, conn_indices)
    parent_category = dict_util.get_parent_category(syntax_tree, conn_indices)
    left_sibling_category = dict_util.get_left_sibling_category(syntax_tree, conn_indices)
    right_sibling_category = dict_util.get_right_sibling_category(syntax_tree, conn_indices)

    conn_name = CLString
    conn_self_category = "%s|%s" % (conn_name, self_category)
    conn_parent_category = "%s|%s" % (conn_name, parent_category)
    conn_left_sibling_category = "%s|%s" % (conn_name, left_sibling_category)
    conn_right_sibling_category = "%s|%s" % (conn_name, right_sibling_category)

    features = []
    features.append(get_feature_by_feat(conn_self_category_dict , conn_self_category))
    features.append(get_feature_by_feat(conn_parent_category_dict , conn_parent_category))
    features.append(get_feature_by_feat(conn_left_sibling_category_dict , conn_left_sibling_category))
    features.append(get_feature_by_feat(conn_right_sibling_category_dict , conn_right_sibling_category))

    return util.mergeFeatures(features)
def _all_features(arg_clauses, clause_index, parse_dict):
    # load dict
    dict_lowercase_verbs = Implicit_arg1_dict().dict_lowercase_verbs
    dict_lemma_verbs = Implicit_arg1_dict().dict_lemma_verbs
    dict_curr_first = Implicit_arg1_dict().dict_curr_first
    dict_curr_last = Implicit_arg1_dict().dict_curr_last
    dict_prev_last = Implicit_arg1_dict().dict_prev_last
    dict_next_first = Implicit_arg1_dict().dict_next_first
    dict_prev_last_curr_first = Implicit_arg1_dict().dict_prev_last_curr_first
    dict_curr_last_next_first = Implicit_arg1_dict().dict_curr_last_next_first
    dict_position = {"left": 1, "middle": 2, "right": 3}
    dict_prev_curr_CP_production_rule = Implicit_arg1_dict().dict_prev_curr_CP_production_rule
    dict_prev2_pos_lemma_verb = Implicit_arg1_dict().dict_prev2_pos_lemma_verb


    # feature
    lowercase_verbs_list = dict_util.get_curr_lowercased_verbs(arg_clauses, clause_index, parse_dict)
    lemma_verbs_list = dict_util.get_curr_lemma_verbs(arg_clauses, clause_index, parse_dict)

    curr_first = dict_util.get_curr_first(arg_clauses, clause_index, parse_dict)
    curr_last = dict_util.get_curr_last(arg_clauses, clause_index, parse_dict)
    prev_last = dict_util.get_prev_last(arg_clauses, clause_index, parse_dict)
    next_first = dict_util.get_next_first(arg_clauses, clause_index, parse_dict)
    prev_last_curr_first = "%s_%s" % (prev_last, curr_first)
    curr_last_next_first = "%s_%s" % (curr_last, next_first)

    # the number of words in curr clause
    clause_word_num = len(arg_clauses.clauses[clause_index][0])
    # the position of current clause
    position = dict_util.get_curr_position(arg_clauses, clause_index, parse_dict)
    # the arg label of current clause

    prev_curr_CP_production_rule = dict_util.get_prev_curr_CP_production_rule(arg_clauses, clause_index, parse_dict)

    prev2_pos_lemma_verb = dict_util.get_2prev_pos_lemma_verb(arg_clauses, clause_index, parse_dict)



    features = []
    features.append(get_feature_by_feat_list(dict_lowercase_verbs, lowercase_verbs_list))
    features.append(get_feature_by_feat_list(dict_lemma_verbs, lemma_verbs_list))

    features.append(get_feature_by_feat(dict_curr_first, curr_first))
    features.append(get_feature_by_feat(dict_curr_last, curr_last))
    features.append(get_feature_by_feat(dict_prev_last, prev_last))
    features.append(get_feature_by_feat(dict_next_first, next_first))
    features.append(get_feature_by_feat(dict_prev_last_curr_first, prev_last_curr_first))
    features.append(get_feature_by_feat(dict_curr_last_next_first, curr_last_next_first))
    features.append(get_feature_by_feat(dict_position, position))
    features.append(Feature("", 1, {"1": clause_word_num}))


    features.append(get_feature_by_feat(dict_prev2_pos_lemma_verb, prev2_pos_lemma_verb))


    ''' production rules '''
    features.append(get_feature_by_feat_list(dict_prev_curr_CP_production_rule, prev_curr_CP_production_rule))


    return util.mergeFeatures(features)
Exemplo n.º 4
0
def verbs(relation, parse_dict):
    #load dict
    dict_verb_classes = Non_Explicit_dict().dict_verb_classes

    '''feature'''
    # 1. the number of pairs of verbs in Arg1 and Arg2 from same verb class
    Arg1_words = dict_util.get_Arg_Words_List(relation, "Arg1", parse_dict)
    Arg2_words = dict_util.get_Arg_Words_List(relation, "Arg2", parse_dict)

    count = 0
    for w1, w2 in [(w1.lower(), w2.lower()) for w1 in Arg1_words for w2 in Arg2_words]:
        if w1 in dict_verb_classes and w2 in dict_verb_classes:
            c1 = dict_verb_classes[w1]
            c2 = dict_verb_classes[w2]
            if set(c1.split("#")) & set(c2.split("#")) != set([]):
                count += 1
    feat_1 = Feature("", 1, {1: count})

    #2. POS of main verb
    Arg1_MV_POS = dict_util.get_main_verb_pos(relation, "Arg1", parse_dict)
    Arg2_MV_POS = dict_util.get_main_verb_pos(relation, "Arg2", parse_dict)


    MV_POS_feature_list = Arg1_MV_POS + Arg2_MV_POS

    MV_POS_feature = get_feature_by_list(MV_POS_feature_list)


    return util.mergeFeatures([feat_1, MV_POS_feature])
Exemplo n.º 5
0
def all_features(arg_clauses, clause_index, parse_dict):
    feature_function_list = [
        # lowercase_verbs,
        lemma_verbs,
        curr_first,
        curr_last,
        # prev_last,
        # next_first,
        prev_last_curr_first,
        # curr_last_next_first,
        # production_rule_list,
        # position,
        # # mine
        # con_str,
        con_lstr,
        con_cat,
        # conn_to_root_path,
        # conn_to_root_compressed_path,
        # conn_curr_position
    ]

    features = [feature_function(arg_clauses, clause_index, parse_dict) for feature_function in feature_function_list]
    #合并特征
    feature = util.mergeFeatures(features)
    return feature
def syn_syn(parse_dict, DocID, sent_index, conn_indices):
    # load dict
    self_parent_dict = Explicit_dict().self_parent_dict
    self_right_dict = Explicit_dict().self_right_dict
    self_left_dict = Explicit_dict().self_left_dict
    parent_left_dict = Explicit_dict().parent_left_dict
    parent_right_dict = Explicit_dict().parent_right_dict
    left_right_dict = Explicit_dict().left_right_dict
    # feature
    parse_tree = parse_dict[DocID]["sentences"][sent_index]["parsetree"].strip()
    syntax_tree = Syntax_tree(parse_tree)

    self_category = dict_util.get_self_category(syntax_tree, conn_indices)
    parent_category = dict_util.get_parent_category(syntax_tree, conn_indices)
    left_sibling_category = dict_util.get_left_sibling_category(syntax_tree, conn_indices)
    right_sibling_category = dict_util.get_right_sibling_category(syntax_tree, conn_indices)

    self_parent = "%s|%s" % (self_category, parent_category)
    self_right = "%s|%s" % (self_category, right_sibling_category)
    self_left = "%s|%s" % (self_category, left_sibling_category)
    parent_left = "%s|%s" % (parent_category, left_sibling_category)
    parent_right = "%s|%s" % (parent_category, right_sibling_category)
    left_right = "%s|%s" % (left_sibling_category, right_sibling_category)

    features = []
    features.append(get_feature_by_feat(self_parent_dict, self_parent))
    features.append(get_feature_by_feat(self_right_dict, self_right ))
    features.append(get_feature_by_feat(self_left_dict, self_left))
    features.append(get_feature_by_feat(parent_left_dict, parent_left))
    features.append(get_feature_by_feat(parent_right_dict, parent_right))
    features.append(get_feature_by_feat(left_right_dict, left_right))

    return util.mergeFeatures(features)
def _all_features(parse_dict, constituent, i, constituents):
    feature_function_list = [
        CON_POS,


        NT_prev_curr_Path,
        CParent_to_root_path,
        self_category,


        CParent_to_root_path_node_names,
        left_sibling_category,
        NT_to_root_path,
        conn_parent_categoryCtx,
        parent_category,
        conn_rightSiblingCtx,


        CON_Str,
        CON_LStr,
        CON_Cat,
        CON_iRSib,

        NT_Ctx,
        CON_NT_Path,
        CON_NT_Path_iLsib,

    ]

    features = [feature_function(parse_dict, constituent, i, constituents) for feature_function in feature_function_list]
    # merge features
    feature = util.mergeFeatures(features)
    return feature
Exemplo n.º 8
0
def firstlast_first3(relation, parse_dict):
    # load dict
    dict_Arg1_first = Non_Explicit_dict().dict_Arg1_first
    dict_Arg1_last = Non_Explicit_dict().dict_Arg1_last
    dict_Arg2_first = Non_Explicit_dict().dict_Arg2_first
    dict_Arg2_last = Non_Explicit_dict().dict_Arg2_last
    dict_Arg1_first_Arg2_first = Non_Explicit_dict().dict_Arg1_first_Arg2_first
    dict_Arg1_last_Arg2_last = Non_Explicit_dict().dict_Arg1_last_Arg2_last
    dict_Arg1_first3 = Non_Explicit_dict().dict_Arg1_first3
    dict_Arg2_first3 = Non_Explicit_dict().dict_Arg2_first3

    ''' feature '''
    Arg1_first, Arg1_last, Arg2_first, Arg2_last,\
    Arg1_first_Arg2_first, Arg1_last_Arg2_last,\
    Arg1_first3, Arg2_first3 \
         = dict_util.get_firstlast_first3(relation, parse_dict)

    features = []
    features.append(get_feature_by_feat(dict_Arg1_first,Arg1_first))
    features.append(get_feature_by_feat(dict_Arg1_last,Arg1_last))
    features.append(get_feature_by_feat(dict_Arg2_first,Arg2_first))
    features.append(get_feature_by_feat(dict_Arg2_last,Arg2_last))
    features.append(get_feature_by_feat(dict_Arg1_first_Arg2_first,Arg1_first_Arg2_first))
    features.append(get_feature_by_feat(dict_Arg1_last_Arg2_last,Arg1_last_Arg2_last))
    features.append(get_feature_by_feat(dict_Arg1_first3,Arg1_first3))
    features.append(get_feature_by_feat(dict_Arg2_first3,Arg2_first3))

    return util.mergeFeatures(features)
Exemplo n.º 9
0
def _all_features(arg_clauses, clause_index, parse_dict):
    # load dict
    dict_lowercase_verbs = Implicit_arg2_dict().dict_lowercase_verbs
    dict_lemma_verbs = Implicit_arg2_dict().dict_lemma_verbs
    dict_curr_first = Implicit_arg2_dict().dict_curr_first
    dict_curr_last = Implicit_arg2_dict().dict_curr_last
    dict_prev_last = Implicit_arg2_dict().dict_prev_last
    dict_next_first = Implicit_arg2_dict().dict_next_first
    dict_prev_last_curr_first = Implicit_arg2_dict().dict_prev_last_curr_first
    dict_curr_last_next_first = Implicit_arg2_dict().dict_curr_last_next_first
    dict_position = {"left": 1, "middle": 2, "right": 3}
    dict_prev_curr_CP_production_rule = Implicit_arg2_dict().dict_prev_curr_CP_production_rule
    dict_prev2_pos_lemma_verb = Implicit_arg2_dict().dict_prev2_pos_lemma_verb


    # feature
    lowercase_verbs_list = dict_util.get_curr_lowercased_verbs(arg_clauses, clause_index, parse_dict)
    lemma_verbs_list = dict_util.get_curr_lemma_verbs(arg_clauses, clause_index, parse_dict)

    curr_first = dict_util.get_curr_first(arg_clauses, clause_index, parse_dict)
    curr_last = dict_util.get_curr_last(arg_clauses, clause_index, parse_dict)
    prev_last = dict_util.get_prev_last(arg_clauses, clause_index, parse_dict)
    next_first = dict_util.get_next_first(arg_clauses, clause_index, parse_dict)
    prev_last_curr_first = "%s_%s" % (prev_last, curr_first)
    curr_last_next_first = "%s_%s" % (curr_last, next_first)

    # the number of words in current clause
    clause_word_num = len(arg_clauses.clauses[clause_index][0])
    # the position of current clause
    position = dict_util.get_curr_position(arg_clauses, clause_index, parse_dict)
    prev_curr_CP_production_rule = dict_util.get_prev_curr_CP_production_rule(arg_clauses, clause_index, parse_dict)

    prev2_pos_lemma_verb = dict_util.get_2prev_pos_lemma_verb(arg_clauses, clause_index, parse_dict)



    features = []
    features.append(get_feature_by_feat_list(dict_lowercase_verbs, lowercase_verbs_list))
    features.append(get_feature_by_feat_list(dict_lemma_verbs, lemma_verbs_list))

    features.append(get_feature_by_feat(dict_curr_first, curr_first))
    features.append(get_feature_by_feat(dict_curr_last, curr_last))
    features.append(get_feature_by_feat(dict_prev_last, prev_last))
    features.append(get_feature_by_feat(dict_next_first, next_first))
    features.append(get_feature_by_feat(dict_prev_last_curr_first, prev_last_curr_first))
    features.append(get_feature_by_feat(dict_curr_last_next_first, curr_last_next_first))
    features.append(get_feature_by_feat(dict_position, position))
    features.append(Feature("", 1, {"1": clause_word_num}))


    features.append(get_feature_by_feat(dict_prev2_pos_lemma_verb, prev2_pos_lemma_verb))


    ''' production rules '''
    features.append(get_feature_by_feat_list(dict_prev_curr_CP_production_rule, prev_curr_CP_production_rule))


    return util.mergeFeatures(features)
Exemplo n.º 10
0
def all_features(parse_dict, constituent, i, constituents):

    syntax_tree = constituent.syntax_tree
    conn_category = Connectives_dict().conn_category
    connective = constituent.connective
    ''' feat dict '''
    feat_dict_CON_Str = {}
    feat_dict_CON_LStr = {}
    feat_dict_NT_Ctx = {}
    feat_dict_CON_NT_Path = {}
    feat_dict_CON_NT_Path_iLsib = {}
    ''' load dict '''
    dict_CON_Str = NT_dict().dict_CON_Str
    dict_CON_LStr = NT_dict().dict_CON_LStr
    dict_NT_Ctx = NT_dict().dict_NT_Ctx
    dict_CON_NT_Path = NT_dict().dict_CON_NT_Path
    dict_CON_NT_Path_iLsib = NT_dict().dict_CON_NT_Path_iLsib
    ''' feature '''
    conn_indices = connective.token_indices
    DocID = connective.DocID
    sent_index = connective.sent_index

    conn_node = dict_util.get_conn_node(syntax_tree, conn_indices)

    CON_Str = dict_util.get_CON_Str(parse_dict, DocID, sent_index,
                                    conn_indices)
    CON_LStr = CON_Str.lower()
    CON_Cat = conn_category[connective.name]
    CON_iLSib = dict_util.get_CON_iLSib(syntax_tree, conn_node)
    CON_iRSib = dict_util.get_CON_iRSib(syntax_tree, conn_node)
    NT_Ctx = dict_util.get_NT_Ctx(constituent)
    CON_NT_Path = dict_util.get_CON_NT_Path(conn_node, constituent)
    CON_NT_Position = dict_util.get_CON_NT_Position(conn_node, constituent)
    if CON_iLSib > 1:
        CON_NT_Path_iLsib = CON_NT_Path + ":>1"
    else:
        CON_NT_Path_iLsib = CON_NT_Path + ":<=1"

    features = []
    features.append(get_feature(feat_dict_CON_Str, dict_CON_Str, CON_Str))
    features.append(get_feature(feat_dict_CON_LStr, dict_CON_LStr, CON_LStr))
    features.append(get_feature(feat_dict_NT_Ctx, dict_NT_Ctx, NT_Ctx))
    features.append(
        get_feature(feat_dict_CON_NT_Path, dict_CON_NT_Path, CON_NT_Path))
    features.append(
        get_feature(feat_dict_CON_NT_Path_iLsib, dict_CON_NT_Path_iLsib,
                    CON_NT_Path_iLsib))
    # cat
    dict_category = {"subordinator": 1, "coordinator": 2, "adverbial": 3}
    features.append(get_feature({}, dict_category, CON_Cat))
    #number
    features.append(Feature("", 1, {1: CON_iLSib}))
    features.append(Feature("", 1, {1: CON_iRSib}))
    #position
    dict_position = {"right": 1, "left": 2}
    features.append(get_feature({}, dict_position, CON_NT_Position))

    return util.mergeFeatures(features)
Exemplo n.º 11
0
def dependency_rules(relation, parse_dict):
    ''' load dict '''
    dict_dependency_rules = Non_Explicit_dict().dict_dependency_rules

    ''' feature '''
    Arg1_dependency_rules = dict_util.get_Arg_dependency_rules(relation, "Arg1", parse_dict)
    Arg2_dependency_rules = dict_util.get_Arg_dependency_rules(relation, "Arg2", parse_dict)
    Arg1_and_Arg2_dependency_rules = list(set(Arg1_dependency_rules) & set(Arg2_dependency_rules))

    feat_Arg1 = get_feature_by_feat_list(dict_dependency_rules, Arg1_dependency_rules)
    feat_Arg2 = get_feature_by_feat_list(dict_dependency_rules, Arg2_dependency_rules)
    feat_Arg1_and_Arg2 = get_feature_by_feat_list(dict_dependency_rules, Arg1_and_Arg2_dependency_rules)

    return util.mergeFeatures([feat_Arg1, feat_Arg2, feat_Arg1_and_Arg2])
Exemplo n.º 12
0
def all_features(arg_clauses, clause_index, parse_dict):

    feature_function_list = [
        prev_curr_CP_production_rule, curr_last, is_NNP_WP,
        is_curr_NNP_prev_PRP_or_NNP, clause_word_num, prev2_pos_lemma_verb,
        lemma_verbs
    ]

    features = [
        feature_function(arg_clauses, clause_index, parse_dict)
        for feature_function in feature_function_list
    ]
    # merge features
    feature = util.mergeFeatures(features)
    return feature
Exemplo n.º 13
0
def all_features(arg_clauses, clause_index, parse_dict):
    feature_function_list = [
        production_rule_list, curr_first, curr_first_prev_last_parse_path,
        next_first, conn_to_root_path, con_str, prev_last,
        curr_last_next_first, con_lstr, conn_connCtx,
        conn_to_root_compressed_path, CPOS, CParent_to_root_path_node_names,
        con_cat
    ]

    features = [
        feature_function(arg_clauses, clause_index, parse_dict)
        for feature_function in feature_function_list
    ]
    # merge features
    feature = util.mergeFeatures(features)
    return feature
def all_features(arg_clauses, clause_index, parse_dict):

    feature_function_list = [
        prev_curr_CP_production_rule,
        is_NNP_WP,
        is_curr_NNP_prev_PRP_or_NNP,
        clause_word_num,
        prev2_pos_lemma_verb,
        next_first,
        prev_last,
    ]

    features = [feature_function(arg_clauses, clause_index, parse_dict) for feature_function in feature_function_list]
    # merge features
    feature = util.mergeFeatures(features)
    return feature
Exemplo n.º 15
0
def modality(relation, parse_dict):
    '''feature'''
    Arg1_words = dict_util.get_Arg_Words_List(relation, "Arg1", parse_dict)
    Arg2_words = dict_util.get_Arg_Words_List(relation, "Arg2", parse_dict)

    #
    Arg1_modality_vec = dict_util.get_modality_vec(Arg1_words)
    Arg2_modality_vec = dict_util.get_modality_vec(Arg2_words)
    cp = util.cross_product(Arg1_modality_vec, Arg2_modality_vec)

    features = []
    features.append(get_feature_by_list(Arg1_modality_vec))
    features.append(get_feature_by_list(Arg2_modality_vec))
    features.append(get_feature_by_list(cp))

    return util.mergeFeatures(features)
Exemplo n.º 16
0
def all_features(relation, parse_dict):
    feature_function_list = [
        # word_pairs,
        production_rules, dependency_rules,
        firstlast_first3,
        # polarity,
        modality,
        verbs,
        brown_cluster_pair,
        Inquirer,
        MPQA_polarity,
    ]

    features = [feature_function(relation, parse_dict) for feature_function in feature_function_list]
    # merge features
    feature = mergeFeatures(features)
    return feature
def modality(relation, parse_dict):

    '''feature'''
    Arg1_words = dict_util.get_Arg_Words_List(relation, "Arg1", parse_dict)
    Arg2_words = dict_util.get_Arg_Words_List(relation, "Arg2", parse_dict)

    #
    Arg1_modality_vec = dict_util.get_modality_vec(Arg1_words)
    Arg2_modality_vec = dict_util.get_modality_vec(Arg2_words)
    cp = util.cross_product(Arg1_modality_vec, Arg2_modality_vec)


    features = []
    features.append(get_feature_by_list(Arg1_modality_vec))
    features.append(get_feature_by_list(Arg2_modality_vec))
    features.append(get_feature_by_list(cp))


    return util.mergeFeatures(features)
def _all_features(parse_dict, connective):
    DocID = connective.DocID
    sent_index = connective.sent_index
    conn_indices = connective.token_indices

    feature_function_list = [
        # Z.lin
        CString,
        CPOS,
        C_Prev,
        CLString,
        # Pitler
        self_category,
        parent_category,
        left_sibling_category,
        right_sibling_category,
        # conn - syn
        conn_self_category,
        conn_parent_category,
        conn_left_sibling_category,
        conn_right_sibling_category,
        # syn - syn
        self_parent,
        self_right,
        self_left,
        parent_left,
        parent_right,
        left_right,
        # mine
        conn_parent_category_ctx,
        as_prev_conn,
        as_prev_connPOS,
        when_prev_conn,
        when_prev_connPOS

    ]

    features = [feature_function(parse_dict, DocID, sent_index, conn_indices) for feature_function in feature_function_list]
    # merge features
    feature = util.mergeFeatures(features)
    return feature
def all_features(arg_clauses, clause_index, parse_dict):
    feature_function_list = [
        production_rule_list,
        curr_first,
        curr_first_prev_last_parse_path,
        next_first,
        conn_to_root_path,
        con_str,
        prev_last,
        curr_last_next_first,
        con_lstr,
        conn_connCtx,
        conn_to_root_compressed_path,
        CPOS,
        CParent_to_root_path_node_names,
        con_cat
    ]

    features = [feature_function(arg_clauses, clause_index, parse_dict) for feature_function in feature_function_list]
    # merge features
    feature = util.mergeFeatures(features)
    return feature
Exemplo n.º 20
0
def Arg_word2vec(relation, parse_dict):
    ''' load dict '''
    dict_word2vec = Non_Explicit_dict().word2vec_dict
    ''' feature '''
    Arg1_words = dict_util._get_lower_case_lemma_words(relation, "Arg1",
                                                       parse_dict)
    Arg2_words = dict_util._get_lower_case_lemma_words(relation, "Arg2",
                                                       parse_dict)

    Arg1_words = list(set(Arg1_words))
    Arg2_words = list(set(Arg2_words))

    Arg1_vec = [0.0] * 300
    Arg1_length = 0
    for word in Arg1_words:
        if word in dict_word2vec:
            vec = dict_word2vec[word]
            Arg1_vec = util.vec_plus_vec(Arg1_vec, vec)
            Arg1_length += 1

    Arg2_vec = [0.0] * 300
    Arg2_length = 0
    for word in Arg2_words:
        if word in dict_word2vec:
            vec = dict_word2vec[word]
            Arg2_vec = util.vec_plus_vec(Arg2_vec, vec)
            Arg2_length += 1

    # average
    if Arg1_length != 0:
        Arg1_vec = [v / Arg1_length for v in Arg1_vec]
    if Arg2_length != 0:
        Arg2_vec = [v / Arg2_length for v in Arg2_vec]

    feat1 = get_feature_by_list(Arg1_vec)
    feat2 = get_feature_by_list(Arg2_vec)

    return util.mergeFeatures([feat1, feat2])
def Arg_word2vec(relation, parse_dict):
    ''' load dict '''
    dict_word2vec = Non_Explicit_dict().word2vec_dict

    ''' feature '''
    Arg1_words = dict_util._get_lower_case_lemma_words(relation, "Arg1", parse_dict)
    Arg2_words = dict_util._get_lower_case_lemma_words(relation, "Arg2", parse_dict)

    Arg1_words = list(set(Arg1_words))
    Arg2_words = list(set(Arg2_words))

    Arg1_vec = [0.0] * 300
    Arg1_length = 0
    for word in Arg1_words:
        if word in dict_word2vec:
            vec = dict_word2vec[word]
            Arg1_vec = util.vec_plus_vec(Arg1_vec, vec)
            Arg1_length += 1

    Arg2_vec = [0.0] * 300
    Arg2_length = 0
    for word in Arg2_words:
        if word in dict_word2vec:
            vec = dict_word2vec[word]
            Arg2_vec = util.vec_plus_vec(Arg2_vec, vec)
            Arg2_length += 1

    # average
    if Arg1_length != 0:
        Arg1_vec = [v/Arg1_length for v in Arg1_vec]
    if Arg2_length != 0:
        Arg2_vec = [v/Arg2_length for v in Arg2_vec]

    feat1 = get_feature_by_list(Arg1_vec)
    feat2 = get_feature_by_list(Arg2_vec)

    return util.mergeFeatures([feat1, feat2])
def all_features(parse_dict, DocID, sent_index, conn_indices):
    ''' feat dict '''
    feat_dict_CString = {}
    feat_dict_CPOS = {}
    feat_dict_prev1 = {}
    feat_dict_prev1POS = {}
    feat_dict_prev1_C = {}
    feat_dict_prev1POS_CPOS = {}
    feat_dict_prev2 = {}
    feat_dict_prev2POS = {}
    feat_dict_prev2_C = {}
    feat_dict_prev2POS_CPOS = {}

    feat_dict_next1POS_CPOS = {}
    feat_dict_next2 = {}

    ''' load dict '''
    dict_CString = Arg_position_dict().dict_CString
    dict_CPOS = Arg_position_dict().dict_CPOS
    dict_prev1 = Arg_position_dict().dict_prev1
    dict_prev1POS = Arg_position_dict().dict_prev1POS
    dict_prev1_C = Arg_position_dict().dict_prev1_C
    dict_prev1POS_CPOS = Arg_position_dict().dict_prev1POS_CPOS
    dict_prev2 = Arg_position_dict().dict_prev2
    dict_prev2POS = Arg_position_dict().dict_prev2POS
    dict_prev2_C = Arg_position_dict().dict_prev2_C
    dict_prev2POS_CPOS = Arg_position_dict().dict_prev2POS_CPOS

    dict_conn_to_root_path = Arg_position_dict().dict_conn_to_root_path

    dict_next1POS_CPOS = Arg_position_dict().dict_next1POS_CPOS
    dict_next2 = Arg_position_dict().dict_next2

    ''' feature '''
    C_String = dict_util.get_C_String(parse_dict, DocID, sent_index, conn_indices)
    CPOS = dict_util.get_CPOS(parse_dict, DocID, sent_index, conn_indices)
    prev1 = dict_util.get_prev1(parse_dict, DocID, sent_index, conn_indices)
    prev1POS = dict_util.get_prev1POS(parse_dict, DocID, sent_index, conn_indices)
    prev2 = dict_util.get_prev2(parse_dict, DocID, sent_index, conn_indices)
    prev2POS = dict_util.get_prev2POS(parse_dict, DocID, sent_index, conn_indices)

    prev1_C = "%s|%s" % (prev1, C_String)
    prev1POS_CPOS = "%s|%s" % (prev1POS, CPOS)

    prev2_C = "%s|%s" % (prev2, C_String)
    prev2POS_CPOS = "%s|%s" % (prev2POS, CPOS)

    next1, next1POS = dict_util.get_next1_next1POS(parse_dict, DocID, sent_index, conn_indices)
    next2, next2POS = dict_util.get_next2_next2POS(parse_dict, DocID, sent_index, conn_indices)

    next1POS_CPOS = "%s|%s" % (CPOS, next1POS)

    conn_to_root_path = dict_util.get_conn_to_root_path(parse_dict, DocID, sent_index, conn_indices)


    features = []
    features.append(get_feature(feat_dict_CString, dict_CString, C_String))
    features.append(C_Position_feature(parse_dict, DocID, sent_index, conn_indices))# position feature
    features.append(get_feature(feat_dict_CPOS, dict_CPOS, CPOS))
    features.append(get_feature(feat_dict_prev1, dict_prev1, prev1))
    features.append(get_feature(feat_dict_prev1POS, dict_prev1POS, prev1POS))
    features.append(get_feature(feat_dict_prev1_C, dict_prev1_C, prev1_C))
    features.append(get_feature(feat_dict_prev1POS_CPOS, dict_prev1POS_CPOS, prev1POS_CPOS))
    features.append(get_feature(feat_dict_prev2, dict_prev2, prev2))
    features.append(get_feature(feat_dict_prev2POS, dict_prev2POS, prev2POS))
    features.append(get_feature(feat_dict_prev2_C, dict_prev2_C, prev2_C))
    features.append(get_feature(feat_dict_prev2POS_CPOS, dict_prev2POS_CPOS, prev2POS_CPOS))

    features.append(get_feature(feat_dict_next1POS_CPOS, dict_next1POS_CPOS, next1POS_CPOS))
    features.append(get_feature(feat_dict_next2, dict_next2, next2))

    features.append(get_feature_by_feat(dict_conn_to_root_path, conn_to_root_path))

    return util.mergeFeatures(features)
def all_features(parse_dict, DocID, sent_index, conn_indices):
    # feat dict
    '''Z.Lin'''
    feat_dict_CPOS_dict = {}
    feat_dict_prev_C_dict = {}
    feat_dict_prevPOS_dict = {}
    feat_dict_prevPOS_CPOS_dict = {}
    feat_dict_C_next_dict = {}
    feat_dict_nextPOS_dict = {}
    feat_dict_CPOS_nextPOS_dict = {}
    feat_dict_CParent_to_root_path_dict = {}
    feat_dict_compressed_CParent_to_root_path_dict = {}

    '''Pitler'''
    feat_dict_self_category_dict = {}
    feat_dict_parent_category_dict = {}
    feat_dict_left_sibling_category_dict = {}
    feat_dict_right_sibling_category_dict = {}
    ''' conn_syn '''
    feat_dict_conn_self_category_dict = {}
    feat_dict_conn_parent_category_dict = {}
    feat_dict_conn_left_sibling_category_dict = {}
    feat_dict_conn_right_sibling_category_dict = {}
    ''' syn_syn '''
    feat_dict_self_parent = {}
    feat_dict_self_right = {}
    feat_dict_self_left = {}
    feat_dict_parent_left = {}
    feat_dict_parent_right = {}
    feat_dict_left_right = {}

    #dict
    '''Z.Lin'''
    CPOS_dict = Connectives_dict().cpos_dict
    prev_C_dict = Connectives_dict().prev_C_dict
    prevPOS_dict = Connectives_dict().prevPOS_dict
    prevPOS_CPOS_dict = Connectives_dict().prevPOS_CPOS_dict
    C_next_dict = Connectives_dict().C_next_dict
    nextPOS_dict = Connectives_dict().nextPOS_dict
    CPOS_nextPOS_dict = Connectives_dict().CPOS_nextPOS_dict
    CParent_to_root_path_dict = Connectives_dict().CParent_to_root_path_dict
    compressed_CParent_to_root_path_dict = Connectives_dict().compressed_CParent_to_root_path_dict

    '''Pitler'''
    self_category_dict = Connectives_dict().self_category_dict
    parent_category_dict = Connectives_dict().parent_category_dict
    left_sibling_category_dict = Connectives_dict().left_sibling_category_dict
    right_sibling_category_dict = Connectives_dict().right_sibling_category_dict
    ''' conn_syn '''
    conn_self_category_dict = Connectives_dict().conn_self_category_dict
    conn_parent_category_dict = Connectives_dict().conn_parent_category_dict
    conn_left_sibling_category_dict = Connectives_dict().conn_left_sibling_category_dict
    conn_right_sibling_category_dict = Connectives_dict().conn_right_sibling_category_dict
    ''' syn_syn '''
    self_parent_dict = Connectives_dict().self_parent_dict
    self_right_dict = Connectives_dict().self_right_dict
    self_left_dict = Connectives_dict().self_left_dict
    parent_left_dict = Connectives_dict().parent_left_dict
    parent_right_dict = Connectives_dict().parent_right_dict
    left_right_dict = Connectives_dict().left_right_dict

    ''' mine '''
    dict_conn_lower_case = Connectives_dict().dict_conn_lower_case
    dict_conn = Connectives_dict().dict_conn
    dict_CParent_to_root_path_node_names = Connectives_dict().dict_CParent_to_root_path_node_names
    dict_conn_rightSiblingCtx = Connectives_dict().dict_conn_rightSiblingCtx
    dict_conn_parent_category_Ctx = Connectives_dict().dict_conn_parent_category_Ctx

    ''' c pos '''
    pos_tag_list = []
    for conn_index in conn_indices:
        pos_tag_list.append(parse_dict[DocID]["sentences"][sent_index]["words"][conn_index][1]["PartOfSpeech"])
    CPOS = "_".join(pos_tag_list)

    ''' prev '''
    flag = 0
    prev_index = conn_indices[0] - 1
    prev_sent_index = sent_index
    if prev_index < 0:
        prev_index = -1
        prev_sent_index -= 1
        if prev_sent_index < 0:
            flag = 1

    if flag == 1 :
        prev = "NONE"
    else:
        prev = parse_dict[DocID]["sentences"][prev_sent_index]["words"][prev_index][0]

    ''' conn_name '''
    conn_name = " ".join([parse_dict[DocID]["sentences"][sent_index]["words"][word_token][0] \
                  for word_token in conn_indices ])

    '''prevPOS'''
    if prev == "NONE":
        prevPOS = "NONE"
    else:
        prevPOS = parse_dict[DocID]["sentences"][prev_sent_index]["words"][prev_index][1]["PartOfSpeech"]

    '''next'''
    sent_count = len(parse_dict[DocID]["sentences"])
    sent_length = len(parse_dict[DocID]["sentences"][sent_index]["words"])

    flag = 0
    next_index = conn_indices[-1] + 1
    next_sent_index = sent_index
    if next_index >= sent_length:
        next_sent_index += 1
        next_index = 0
        if next_sent_index >= sent_count:
            flag = 1

    if flag == 1:
        next = "NONE"
    else:
        next = parse_dict[DocID]["sentences"][next_sent_index]["words"][next_index][0]

    ''' next pos '''
    if next == "NONE":
        nextPOS = "NONE"
    else:
        nextPOS = parse_dict[DocID]["sentences"][next_sent_index]["words"][next_index][1]["PartOfSpeech"]


    parse_tree = parse_dict[DocID]["sentences"][sent_index]["parsetree"].strip()
    syntax_tree = Syntax_tree(parse_tree)


    ''' c parent to root '''
    if syntax_tree.tree == None:
        cparent_to_root_path = "NONE_TREE"
    else:
        cparent_to_root_path = ""
        for conn_index in conn_indices:
            conn_node = syntax_tree.get_leaf_node_by_token_index(conn_index)
            conn_parent_node = conn_node.up
            cparent_to_root_path += syntax_tree.get_node_path_to_root(conn_parent_node) + "&"
        if cparent_to_root_path[-1] == "&":
            cparent_to_root_path = cparent_to_root_path[:-1]

    ''' compressed c parent to root '''
    if syntax_tree.tree == None:
        compressed_path = "NONE_TREE"
    else:
        compressed_path = ""
        for conn_index in conn_indices:
            conn_node = syntax_tree.get_leaf_node_by_token_index(conn_index)
            conn_parent_node = conn_node.up

            path = syntax_tree.get_node_path_to_root(conn_parent_node)

            compressed_path += util.get_compressed_path(path) + "&"

        if compressed_path[-1] == "&":
            compressed_path = compressed_path[:-1]

    ''' Pitler '''
    if syntax_tree.tree == None:
        self_category = "NONE_TREE"
    else:
        self_category = syntax_tree.get_self_category_node_by_token_indices(conn_indices).name

    if syntax_tree.tree == None:
        parent_category = "NONE_TREE"
    else:
        parent_category_node = syntax_tree.get_parent_category_node_by_token_indices(conn_indices)
        if parent_category_node == None:
            parent_category = "ROOT"
        else:
            parent_category = parent_category_node.name

    if syntax_tree.tree == None:
        left_sibling_category = "NONE_TREE"
    else:
        left_sibling_category_node = syntax_tree.get_left_sibling_category_node_by_token_indices(conn_indices)
        if left_sibling_category_node == None:
            left_sibling_category = "NONE"
        else:
            left_sibling_category = left_sibling_category_node.name

    if syntax_tree.tree == None:
        right_sibling_category = "NONE_TREE"
    else:
        right_sibling_category_node = syntax_tree.get_right_sibling_category_node_by_token_indices(conn_indices)
        if right_sibling_category_node == None:
            right_sibling_category = "NONE"
        else:
            right_sibling_category = right_sibling_category_node.name


    prev_C = "%s|%s" % (prev, conn_name)
    prePOS_CPOS = "%s|%s" % (prevPOS, CPOS)
    C_next = "%s|%s" % (conn_name, next)
    CPOS_nextPOS = "%s|%s" % (CPOS, nextPOS)

    conn_self_category = "%s|%s" % (conn_name, self_category)
    conn_parent_category = "%s|%s" % (conn_name, parent_category)
    conn_left_sibling_category = "%s|%s" % (conn_name, left_sibling_category)
    conn_right_sibling_category = "%s|%s" % (conn_name, right_sibling_category)

    self_parent = "%s|%s" % (self_category, parent_category)
    self_right = "%s|%s" % (self_category, right_sibling_category)
    self_left = "%s|%s" % (self_category, left_sibling_category)
    parent_left = "%s|%s" % (parent_category, left_sibling_category)
    parent_right = "%s|%s" % (parent_category, right_sibling_category)
    left_right = "%s|%s" % (left_sibling_category, right_sibling_category)

    '''--- mine ---'''
    conn_lower_case = conn_name.lower()
    # prevPOS_C = "%s|%s" % (prevPOS, conn_name.lower())
    if syntax_tree.tree == None:
        _path = "NONE_TREE"
    else:
        _path = ""
        for conn_index in conn_indices:
            conn_node = syntax_tree.get_leaf_node_by_token_index(conn_index)
            conn_parent_node = conn_node.up
            _path += syntax_tree.get_node_path_to_root(conn_parent_node) + "-->"
        if _path[-3:] == "-->":
            _path = _path[:-3]

    # conn + connCtx
    if syntax_tree.tree == None:
        connCtx = "NONE_TREE"
    else:
        conn_node = syntax_tree.get_self_category_node_by_token_indices(conn_indices)
        connCtx = dict_util.get_node_Ctx(conn_node, syntax_tree)

    conn_connCtx = "%s|%s" % (conn_name, connCtx)

    # conn + right sibling ctx
    if syntax_tree.tree == None:
        rightSiblingCtx = "NONE_TREE"
    else:
        rightSibling_node = syntax_tree.get_right_sibling_category_node_by_token_indices(conn_indices)
        rightSiblingCtx = dict_util.get_node_linked_Ctx(rightSibling_node, syntax_tree)

    conn_rightSiblingCtx = "%s|%s" % (conn_name, rightSiblingCtx)

    # conn _ left sibling ctx
    if syntax_tree.tree == None:
        leftSiblingCtx = "NONE_TREE"
    else:
        leftSibling_node = syntax_tree.get_left_sibling_category_node_by_token_indices(conn_indices)
        leftSiblingCtx = dict_util.get_node_linked_Ctx(leftSibling_node, syntax_tree)

    # conn parent category ctx
    if syntax_tree.tree == None:
        parent_categoryCtx = "NONE_TREE"
    else:
        parent_category_node = syntax_tree.get_parent_category_node_by_token_indices(conn_indices)
        parent_categoryCtx = dict_util.get_node_linked_Ctx(parent_category_node, syntax_tree)

    conn_parent_categoryCtx = "%s|%s" % (conn_name, parent_categoryCtx)



    features = []
    '''Z.Lin'''
    features.append(get_feature(feat_dict_CPOS_dict, CPOS_dict, CPOS))
    features.append(get_feature(feat_dict_prev_C_dict, prev_C_dict, prev_C))
    features.append(get_feature(feat_dict_prevPOS_dict, prevPOS_dict, prevPOS))
    features.append(get_feature(feat_dict_prevPOS_CPOS_dict, prevPOS_CPOS_dict, prePOS_CPOS ))
    features.append(get_feature(feat_dict_C_next_dict, C_next_dict, C_next))
    features.append(get_feature(feat_dict_nextPOS_dict, nextPOS_dict, nextPOS))
    features.append(get_feature(feat_dict_CPOS_nextPOS_dict, CPOS_nextPOS_dict, CPOS_nextPOS))
    features.append(get_feature(feat_dict_CParent_to_root_path_dict,CParent_to_root_path_dict, cparent_to_root_path ))
    features.append(get_feature(feat_dict_compressed_CParent_to_root_path_dict, compressed_CParent_to_root_path_dict, compressed_path))

    ''' pitler '''
    features.append(get_feature(feat_dict_self_category_dict, self_category_dict, self_category))
    features.append(get_feature(feat_dict_parent_category_dict, parent_category_dict, parent_category))
    features.append(get_feature(feat_dict_left_sibling_category_dict, left_sibling_category_dict, left_sibling_category))
    features.append(get_feature(feat_dict_right_sibling_category_dict, right_sibling_category_dict, right_sibling_category))

    feat_dict_is_right_sibling_contains_VP = {}
    if syntax_tree.tree != None and right_sibling_category_node != None:
        T = right_sibling_category_node.get_descendants()
        T.append(right_sibling_category_node)
        for node in T:
            if node.name == "VP" or node.name == "S":
                feat_dict_is_right_sibling_contains_VP[1] = 1
                break
    features.append(Feature("", 1, feat_dict_is_right_sibling_contains_VP))

    ''' conn-syn '''
    features.append(get_feature(feat_dict_conn_self_category_dict, conn_self_category_dict, conn_self_category))
    features.append(get_feature(feat_dict_conn_parent_category_dict, conn_parent_category_dict, conn_parent_category))
    features.append(get_feature(feat_dict_conn_left_sibling_category_dict, conn_left_sibling_category_dict, conn_left_sibling_category))
    features.append(get_feature(feat_dict_conn_right_sibling_category_dict, conn_right_sibling_category_dict, conn_right_sibling_category))

    ''' syn-syn '''

    features.append(get_feature(feat_dict_self_parent, self_parent_dict, self_parent))
    features.append(get_feature(feat_dict_self_right,self_right_dict, self_right ))
    features.append(get_feature(feat_dict_self_left, self_left_dict, self_left))
    features.append(get_feature(feat_dict_parent_left, parent_left_dict, parent_left))
    features.append(get_feature(feat_dict_parent_right, parent_right_dict, parent_right))
    features.append(get_feature(feat_dict_left_right,left_right_dict, left_right))

    ''' mine '''
    features.append(get_feature_by_feat(dict_conn_lower_case, conn_lower_case))
    features.append(get_feature_by_feat(dict_conn, conn_name))

    features.append(get_feature_by_feat_list(dict_CParent_to_root_path_node_names, _path.split("-->")))
    features.append(get_feature_by_feat(dict_conn_rightSiblingCtx, conn_rightSiblingCtx))
    features.append(get_feature_by_feat(dict_conn_parent_category_Ctx, conn_parent_categoryCtx))

    return util.mergeFeatures(features)
def all_features(parse_dict, constituent, i, constituents):

    syntax_tree = constituent.syntax_tree
    conn_category = Connectives_dict().conn_category
    connective = constituent.connective

    ''' feat dict '''
    feat_dict_CON_Str = {}
    feat_dict_CON_LStr = {}
    feat_dict_NT_Ctx = {}
    feat_dict_CON_NT_Path = {}
    feat_dict_CON_NT_Path_iLsib = {}



    ''' load dict '''
    dict_CON_Str = NT_dict().dict_CON_Str
    dict_CON_LStr = NT_dict().dict_CON_LStr
    dict_NT_Ctx = NT_dict().dict_NT_Ctx
    dict_CON_NT_Path = NT_dict().dict_CON_NT_Path
    dict_CON_NT_Path_iLsib = NT_dict().dict_CON_NT_Path_iLsib



    ''' feature '''
    conn_indices = connective.token_indices
    DocID = connective.DocID
    sent_index = connective.sent_index

    conn_node = dict_util.get_conn_node(syntax_tree, conn_indices)

    CON_Str = dict_util.get_CON_Str(parse_dict, DocID, sent_index, conn_indices)
    CON_LStr = CON_Str.lower()
    CON_Cat = conn_category[connective.name]
    CON_iLSib = dict_util.get_CON_iLSib(syntax_tree,conn_node)
    CON_iRSib = dict_util.get_CON_iRSib(syntax_tree,conn_node)
    NT_Ctx = dict_util.get_NT_Ctx(constituent)
    CON_NT_Path = dict_util.get_CON_NT_Path(conn_node, constituent)
    CON_NT_Position = dict_util.get_CON_NT_Position(conn_node, constituent)
    if CON_iLSib > 1:
        CON_NT_Path_iLsib = CON_NT_Path + ":>1"
    else:
        CON_NT_Path_iLsib = CON_NT_Path + ":<=1"



    features = []
    features.append(get_feature(feat_dict_CON_Str, dict_CON_Str , CON_Str))
    features.append(get_feature(feat_dict_CON_LStr, dict_CON_LStr, CON_LStr))
    features.append(get_feature(feat_dict_NT_Ctx, dict_NT_Ctx, NT_Ctx))
    features.append(get_feature(feat_dict_CON_NT_Path, dict_CON_NT_Path, CON_NT_Path))
    features.append(get_feature(feat_dict_CON_NT_Path_iLsib, dict_CON_NT_Path_iLsib, CON_NT_Path_iLsib))
    # cat
    dict_category = {"subordinator": 1, "coordinator": 2, "adverbial": 3 }
    features.append(get_feature({}, dict_category , CON_Cat))
    #number
    features.append(Feature("", 1, {1: CON_iLSib}))
    features.append(Feature("", 1, {1: CON_iRSib}))
    #position
    dict_position = {"right": 1, "left": 2}
    features.append(get_feature({}, dict_position , CON_NT_Position))


    return util.mergeFeatures(features)
def all_features(parse_dict, connective):
    ''' feat dict '''
    feat_dict_CString = {}
    feat_dict_CPOS = {}
    feat_dict_C_Prev = {}


    ''' load dict '''
    dict_CString = Explicit_dict().dict_CString
    dict_CPOS = Explicit_dict().dict_CPOS
    dict_C_Prev = Explicit_dict().dict_C_Prev
    dict_CLString = Explicit_dict().dict_CLString

    '''Pitler'''
    self_category_dict = Explicit_dict().self_category_dict
    parent_category_dict = Explicit_dict().parent_category_dict
    left_sibling_category_dict = Explicit_dict().left_sibling_category_dict
    right_sibling_category_dict = Explicit_dict().right_sibling_category_dict
    ''' conn_syn '''
    conn_self_category_dict = Explicit_dict().conn_self_category_dict
    conn_parent_category_dict = Explicit_dict().conn_parent_category_dict
    conn_left_sibling_category_dict = Explicit_dict().conn_left_sibling_category_dict
    conn_right_sibling_category_dict = Explicit_dict().conn_right_sibling_category_dict
    ''' syn-syn'''
    self_parent_dict = Explicit_dict().self_parent_dict
    self_right_dict = Explicit_dict().self_right_dict
    self_left_dict = Explicit_dict().self_left_dict
    parent_left_dict = Explicit_dict().parent_left_dict
    parent_right_dict = Explicit_dict().parent_right_dict
    left_right_dict = Explicit_dict().left_right_dict

    ''' mine '''
    dict_conn_parent_category_ctx = Explicit_dict().dict_conn_parent_category_ctx
    dict_as_prev_conn = Explicit_dict().dict_as_prev_conn
    dict_as_prev_connPOS = Explicit_dict().dict_as_prev_connPOS

    dict_when_prev_conn = Explicit_dict().dict_when_prev_conn
    dict_when_prev_connPOS = Explicit_dict().dict_when_prev_connPOS


    ''' feature '''
    DocID = connective.DocID
    sent_index = connective.sent_index
    conn_indices = connective.token_indices

    CString = dict_util.get_C_String(parse_dict, DocID, sent_index, conn_indices)
    CPOS = dict_util.get_CPOS(parse_dict, DocID, sent_index, conn_indices)
    prev = dict_util.get_prev1(parse_dict, DocID, sent_index, conn_indices)
    C_Prev = "%s|%s" % (CString, prev)
    CLString = CString.lower()

    # syntax tree
    parse_tree = parse_dict[DocID]["sentences"][sent_index]["parsetree"].strip()
    syntax_tree = Syntax_tree(parse_tree)
    #pitler
    self_category = dict_util.get_self_category(syntax_tree, conn_indices)
    parent_category = dict_util.get_parent_category(syntax_tree, conn_indices)
    left_sibling_category = dict_util.get_left_sibling_category(syntax_tree, conn_indices)
    right_sibling_category = dict_util.get_right_sibling_category(syntax_tree, conn_indices)
    #conn - syn
    conn_name = CLString
    conn_self_category = "%s|%s" % (conn_name, self_category)
    conn_parent_category = "%s|%s" % (conn_name, parent_category)
    conn_left_sibling_category = "%s|%s" % (conn_name, left_sibling_category)
    conn_right_sibling_category = "%s|%s" % (conn_name, right_sibling_category)

    #syn-syn
    self_parent = "%s|%s" % (self_category, parent_category)
    self_right = "%s|%s" % (self_category, right_sibling_category)
    self_left = "%s|%s" % (self_category, left_sibling_category)
    parent_left = "%s|%s" % (parent_category, left_sibling_category)
    parent_right = "%s|%s" % (parent_category, right_sibling_category)
    left_right = "%s|%s" % (left_sibling_category, right_sibling_category)

    ''' mine '''
    conn_parent_category_ctx = dict_util.get_conn_parent_category_Ctx(parse_dict, DocID, sent_index, conn_indices)
    as_prev_conn = dict_util.get_as_prev_conn(parse_dict, DocID, sent_index, conn_indices)
    as_prev_connPOS = dict_util.get_as_prev_connPOS(parse_dict, DocID, sent_index, conn_indices)

    when_prev_conn = dict_util.get_when_prev_conn(parse_dict, DocID, sent_index, conn_indices)
    when_prev_connPOS = dict_util.get_when_prev_connPOS(parse_dict, DocID, sent_index, conn_indices)


    features = []
    features.append(get_feature(feat_dict_CString, dict_CString , CString))
    features.append(get_feature(feat_dict_CPOS, dict_CPOS , CPOS))
    features.append(get_feature(feat_dict_C_Prev, dict_C_Prev , C_Prev))
    features.append(get_feature({}, dict_CLString , CLString))


    features.append(get_feature({}, self_category_dict , self_category))
    features.append(get_feature({}, parent_category_dict , parent_category))
    features.append(get_feature({}, left_sibling_category_dict , left_sibling_category))
    features.append(get_feature({}, right_sibling_category_dict , right_sibling_category))


    features.append(get_feature({}, conn_self_category_dict , conn_self_category))
    features.append(get_feature({}, conn_parent_category_dict , conn_parent_category))
    features.append(get_feature({}, conn_left_sibling_category_dict , conn_left_sibling_category))
    features.append(get_feature({}, conn_right_sibling_category_dict , conn_right_sibling_category))

    features.append(get_feature({}, self_parent_dict, self_parent))
    features.append(get_feature({}, self_right_dict, self_right ))
    features.append(get_feature({}, self_left_dict, self_left))
    features.append(get_feature({}, parent_left_dict, parent_left))
    features.append(get_feature({}, parent_right_dict, parent_right))
    features.append(get_feature({}, left_right_dict, left_right))

    ''' mine '''
    features.append(get_feature_by_feat(dict_conn_parent_category_ctx, conn_parent_category_ctx))
    features.append(get_feature_by_feat(dict_as_prev_conn, as_prev_conn))
    features.append(get_feature_by_feat(dict_as_prev_connPOS, as_prev_connPOS))

    features.append(get_feature_by_feat(dict_when_prev_conn, when_prev_conn))
    features.append(get_feature_by_feat(dict_when_prev_connPOS, when_prev_connPOS))


    return util.mergeFeatures(features)
def _all_features(arg_clauses, clause_index, parse_dict):
    # load dict
    dict_lowercase_verbs = Ps_arg2_dict().dict_lowercase_verbs
    dict_lemma_verbs = Ps_arg2_dict().dict_lemma_verbs
    dict_curr_first = Ps_arg2_dict().dict_curr_first
    dict_curr_last = Ps_arg2_dict().dict_curr_last
    dict_prev_last = Ps_arg2_dict().dict_prev_last
    dict_next_first = Ps_arg2_dict().dict_next_first
    dict_prev_last_curr_first = Ps_arg2_dict().dict_prev_last_curr_first
    dict_curr_last_next_first = Ps_arg2_dict().dict_curr_last_next_first
    dict_curr_production_rule = Ps_arg2_dict().dict_curr_production_rule
    dict_position = {"left": 1, "middle": 2, "right": 3}

    ''' mine '''
    dict_con_str = Ps_arg2_dict().dict_con_str
    dict_con_lstr = Ps_arg2_dict().dict_con_lstr
    dict_con_cat = {"subordinator": 1, "coordinator": 2, "adverbial": 3 }
    dict_conn_to_root_path = Ps_arg2_dict().dict_conn_to_root_path
    dict_conn_to_root_compressed_path = Ps_arg2_dict().dict_conn_to_root_compressed_path
    dict_conn_position = Ps_arg2_dict().dict_conn_position


    # feature
    lowercase_verbs_list = dict_util.get_curr_lowercased_verbs(arg_clauses, clause_index, parse_dict)
    lemma_verbs_list = dict_util.get_curr_lemma_verbs(arg_clauses, clause_index, parse_dict)

    curr_first = dict_util.get_curr_first(arg_clauses, clause_index, parse_dict)
    curr_last = dict_util.get_curr_last(arg_clauses, clause_index, parse_dict)
    prev_last = dict_util.get_prev_last(arg_clauses, clause_index, parse_dict)
    next_first = dict_util.get_next_first(arg_clauses, clause_index, parse_dict)
    prev_last_curr_first = "%s_%s" % (prev_last, curr_first)
    curr_last_next_first = "%s_%s" % (curr_last, next_first)

    # the position of the current clause
    position = dict_util.get_curr_position(arg_clauses, clause_index, parse_dict)

    production_rule_list = dict_util.get_curr_production_rule(arg_clauses, clause_index, parse_dict)

    ''' mine '''
    con_str = dict_util.get_con_str(arg_clauses, clause_index, parse_dict)
    con_lstr = dict_util.get_con_lstr(arg_clauses, clause_index, parse_dict)
    con_cat = dict_util.get_con_cat(arg_clauses, clause_index, parse_dict)
    conn_to_root_path = dict_util.get_conn_to_root_path(arg_clauses, clause_index, parse_dict)
    conn_to_root_compressed_path = dict_util.get_conn_to_root_compressed_path(arg_clauses, clause_index, parse_dict)
    conn_position = dict_util.get_conn_position(arg_clauses, clause_index, parse_dict)


    features = []
    features.append(get_feature_by_feat_list(dict_lowercase_verbs, lowercase_verbs_list))
    features.append(get_feature_by_feat_list(dict_lemma_verbs, lemma_verbs_list))

    features.append(get_feature_by_feat(dict_curr_first, curr_first))
    features.append(get_feature_by_feat(dict_curr_last, curr_last))
    features.append(get_feature_by_feat(dict_prev_last, prev_last))
    features.append(get_feature_by_feat(dict_next_first, next_first))
    features.append(get_feature_by_feat(dict_prev_last_curr_first, prev_last_curr_first))
    features.append(get_feature_by_feat(dict_curr_last_next_first, curr_last_next_first))
    features.append(get_feature_by_feat(dict_position, position))

    ''' production rules '''
    features.append(get_feature_by_feat_list(dict_curr_production_rule, production_rule_list))

    ''' mine '''
    features.append(get_feature_by_feat(dict_con_str, con_str))
    features.append(get_feature_by_feat(dict_con_lstr, con_lstr))
    features.append(get_feature_by_feat(dict_con_cat, con_cat))
    features.append(get_feature_by_feat(dict_conn_to_root_path, conn_to_root_path))
    features.append(get_feature_by_feat(dict_conn_to_root_compressed_path, conn_to_root_compressed_path))
    features.append(get_feature_by_feat(dict_conn_position, conn_position))


    return util.mergeFeatures(features)
Exemplo n.º 27
0
def all_features(parse_dict, DocID, sent_index, conn_indices):
    # feat dict
    '''Z.Lin'''
    feat_dict_CPOS_dict = {}
    feat_dict_prev_C_dict = {}
    feat_dict_prevPOS_dict = {}
    feat_dict_prevPOS_CPOS_dict = {}
    feat_dict_C_next_dict = {}
    feat_dict_nextPOS_dict = {}
    feat_dict_CPOS_nextPOS_dict = {}
    feat_dict_CParent_to_root_path_dict = {}
    feat_dict_compressed_CParent_to_root_path_dict = {}
    '''Pitler'''
    feat_dict_self_category_dict = {}
    feat_dict_parent_category_dict = {}
    feat_dict_left_sibling_category_dict = {}
    feat_dict_right_sibling_category_dict = {}
    ''' conn_syn '''
    feat_dict_conn_self_category_dict = {}
    feat_dict_conn_parent_category_dict = {}
    feat_dict_conn_left_sibling_category_dict = {}
    feat_dict_conn_right_sibling_category_dict = {}
    ''' syn_syn '''
    feat_dict_self_parent = {}
    feat_dict_self_right = {}
    feat_dict_self_left = {}
    feat_dict_parent_left = {}
    feat_dict_parent_right = {}
    feat_dict_left_right = {}

    #dict
    '''Z.Lin'''
    CPOS_dict = Connectives_dict().cpos_dict
    prev_C_dict = Connectives_dict().prev_C_dict
    prevPOS_dict = Connectives_dict().prevPOS_dict
    prevPOS_CPOS_dict = Connectives_dict().prevPOS_CPOS_dict
    C_next_dict = Connectives_dict().C_next_dict
    nextPOS_dict = Connectives_dict().nextPOS_dict
    CPOS_nextPOS_dict = Connectives_dict().CPOS_nextPOS_dict
    CParent_to_root_path_dict = Connectives_dict().CParent_to_root_path_dict
    compressed_CParent_to_root_path_dict = Connectives_dict(
    ).compressed_CParent_to_root_path_dict
    '''Pitler'''
    self_category_dict = Connectives_dict().self_category_dict
    parent_category_dict = Connectives_dict().parent_category_dict
    left_sibling_category_dict = Connectives_dict().left_sibling_category_dict
    right_sibling_category_dict = Connectives_dict(
    ).right_sibling_category_dict
    ''' conn_syn '''
    conn_self_category_dict = Connectives_dict().conn_self_category_dict
    conn_parent_category_dict = Connectives_dict().conn_parent_category_dict
    conn_left_sibling_category_dict = Connectives_dict(
    ).conn_left_sibling_category_dict
    conn_right_sibling_category_dict = Connectives_dict(
    ).conn_right_sibling_category_dict
    ''' syn_syn '''
    self_parent_dict = Connectives_dict().self_parent_dict
    self_right_dict = Connectives_dict().self_right_dict
    self_left_dict = Connectives_dict().self_left_dict
    parent_left_dict = Connectives_dict().parent_left_dict
    parent_right_dict = Connectives_dict().parent_right_dict
    left_right_dict = Connectives_dict().left_right_dict
    ''' mine '''
    dict_conn_lower_case = Connectives_dict().dict_conn_lower_case
    dict_conn = Connectives_dict().dict_conn
    dict_CParent_to_root_path_node_names = Connectives_dict(
    ).dict_CParent_to_root_path_node_names
    dict_conn_rightSiblingCtx = Connectives_dict().dict_conn_rightSiblingCtx
    dict_conn_parent_category_Ctx = Connectives_dict(
    ).dict_conn_parent_category_Ctx
    ''' c pos '''
    pos_tag_list = []
    for conn_index in conn_indices:
        pos_tag_list.append(parse_dict[DocID]["sentences"][sent_index]["words"]
                            [conn_index][1]["PartOfSpeech"])
    CPOS = "_".join(pos_tag_list)
    ''' prev '''
    flag = 0
    prev_index = conn_indices[0] - 1
    prev_sent_index = sent_index
    if prev_index < 0:
        prev_index = -1
        prev_sent_index -= 1
        if prev_sent_index < 0:
            flag = 1

    if flag == 1:
        prev = "NONE"
    else:
        prev = parse_dict[DocID]["sentences"][prev_sent_index]["words"][
            prev_index][0]
    ''' conn_name '''
    conn_name = " ".join([parse_dict[DocID]["sentences"][sent_index]["words"][word_token][0] \
                  for word_token in conn_indices ])
    '''prevPOS'''
    if prev == "NONE":
        prevPOS = "NONE"
    else:
        prevPOS = parse_dict[DocID]["sentences"][prev_sent_index]["words"][
            prev_index][1]["PartOfSpeech"]
    '''next'''
    sent_count = len(parse_dict[DocID]["sentences"])
    sent_length = len(parse_dict[DocID]["sentences"][sent_index]["words"])

    flag = 0
    next_index = conn_indices[-1] + 1
    next_sent_index = sent_index
    if next_index >= sent_length:
        next_sent_index += 1
        next_index = 0
        if next_sent_index >= sent_count:
            flag = 1

    if flag == 1:
        next = "NONE"
    else:
        next = parse_dict[DocID]["sentences"][next_sent_index]["words"][
            next_index][0]
    ''' next pos '''
    if next == "NONE":
        nextPOS = "NONE"
    else:
        nextPOS = parse_dict[DocID]["sentences"][next_sent_index]["words"][
            next_index][1]["PartOfSpeech"]

    parse_tree = parse_dict[DocID]["sentences"][sent_index]["parsetree"].strip(
    )
    syntax_tree = Syntax_tree(parse_tree)
    ''' c parent to root '''
    if syntax_tree.tree == None:
        cparent_to_root_path = "NONE_TREE"
    else:
        cparent_to_root_path = ""
        for conn_index in conn_indices:
            conn_node = syntax_tree.get_leaf_node_by_token_index(conn_index)
            conn_parent_node = conn_node.up
            cparent_to_root_path += syntax_tree.get_node_path_to_root(
                conn_parent_node) + "&"
        if cparent_to_root_path[-1] == "&":
            cparent_to_root_path = cparent_to_root_path[:-1]
    ''' compressed c parent to root '''
    if syntax_tree.tree == None:
        compressed_path = "NONE_TREE"
    else:
        compressed_path = ""
        for conn_index in conn_indices:
            conn_node = syntax_tree.get_leaf_node_by_token_index(conn_index)
            conn_parent_node = conn_node.up

            path = syntax_tree.get_node_path_to_root(conn_parent_node)

            compressed_path += util.get_compressed_path(path) + "&"

        if compressed_path[-1] == "&":
            compressed_path = compressed_path[:-1]
    ''' Pitler '''
    if syntax_tree.tree == None:
        self_category = "NONE_TREE"
    else:
        self_category = syntax_tree.get_self_category_node_by_token_indices(
            conn_indices).name

    if syntax_tree.tree == None:
        parent_category = "NONE_TREE"
    else:
        parent_category_node = syntax_tree.get_parent_category_node_by_token_indices(
            conn_indices)
        if parent_category_node == None:
            parent_category = "ROOT"
        else:
            parent_category = parent_category_node.name

    if syntax_tree.tree == None:
        left_sibling_category = "NONE_TREE"
    else:
        left_sibling_category_node = syntax_tree.get_left_sibling_category_node_by_token_indices(
            conn_indices)
        if left_sibling_category_node == None:
            left_sibling_category = "NONE"
        else:
            left_sibling_category = left_sibling_category_node.name

    if syntax_tree.tree == None:
        right_sibling_category = "NONE_TREE"
    else:
        right_sibling_category_node = syntax_tree.get_right_sibling_category_node_by_token_indices(
            conn_indices)
        if right_sibling_category_node == None:
            right_sibling_category = "NONE"
        else:
            right_sibling_category = right_sibling_category_node.name

    prev_C = "%s|%s" % (prev, conn_name)
    prePOS_CPOS = "%s|%s" % (prevPOS, CPOS)
    C_next = "%s|%s" % (conn_name, next)
    CPOS_nextPOS = "%s|%s" % (CPOS, nextPOS)

    conn_self_category = "%s|%s" % (conn_name, self_category)
    conn_parent_category = "%s|%s" % (conn_name, parent_category)
    conn_left_sibling_category = "%s|%s" % (conn_name, left_sibling_category)
    conn_right_sibling_category = "%s|%s" % (conn_name, right_sibling_category)

    self_parent = "%s|%s" % (self_category, parent_category)
    self_right = "%s|%s" % (self_category, right_sibling_category)
    self_left = "%s|%s" % (self_category, left_sibling_category)
    parent_left = "%s|%s" % (parent_category, left_sibling_category)
    parent_right = "%s|%s" % (parent_category, right_sibling_category)
    left_right = "%s|%s" % (left_sibling_category, right_sibling_category)
    '''--- mine ---'''
    conn_lower_case = conn_name.lower()
    # prevPOS_C = "%s|%s" % (prevPOS, conn_name.lower())
    if syntax_tree.tree == None:
        _path = "NONE_TREE"
    else:
        _path = ""
        for conn_index in conn_indices:
            conn_node = syntax_tree.get_leaf_node_by_token_index(conn_index)
            conn_parent_node = conn_node.up
            _path += syntax_tree.get_node_path_to_root(
                conn_parent_node) + "-->"
        if _path[-3:] == "-->":
            _path = _path[:-3]

    # conn + connCtx
    if syntax_tree.tree == None:
        connCtx = "NONE_TREE"
    else:
        conn_node = syntax_tree.get_self_category_node_by_token_indices(
            conn_indices)
        connCtx = dict_util.get_node_Ctx(conn_node, syntax_tree)

    conn_connCtx = "%s|%s" % (conn_name, connCtx)

    # conn + right sibling ctx
    if syntax_tree.tree == None:
        rightSiblingCtx = "NONE_TREE"
    else:
        rightSibling_node = syntax_tree.get_right_sibling_category_node_by_token_indices(
            conn_indices)
        rightSiblingCtx = dict_util.get_node_linked_Ctx(
            rightSibling_node, syntax_tree)

    conn_rightSiblingCtx = "%s|%s" % (conn_name, rightSiblingCtx)

    # conn _ left sibling ctx
    if syntax_tree.tree == None:
        leftSiblingCtx = "NONE_TREE"
    else:
        leftSibling_node = syntax_tree.get_left_sibling_category_node_by_token_indices(
            conn_indices)
        leftSiblingCtx = dict_util.get_node_linked_Ctx(leftSibling_node,
                                                       syntax_tree)

    # conn parent category ctx
    if syntax_tree.tree == None:
        parent_categoryCtx = "NONE_TREE"
    else:
        parent_category_node = syntax_tree.get_parent_category_node_by_token_indices(
            conn_indices)
        parent_categoryCtx = dict_util.get_node_linked_Ctx(
            parent_category_node, syntax_tree)

    conn_parent_categoryCtx = "%s|%s" % (conn_name, parent_categoryCtx)

    features = []
    '''Z.Lin'''
    features.append(get_feature(feat_dict_CPOS_dict, CPOS_dict, CPOS))
    features.append(get_feature(feat_dict_prev_C_dict, prev_C_dict, prev_C))
    features.append(get_feature(feat_dict_prevPOS_dict, prevPOS_dict, prevPOS))
    features.append(
        get_feature(feat_dict_prevPOS_CPOS_dict, prevPOS_CPOS_dict,
                    prePOS_CPOS))
    features.append(get_feature(feat_dict_C_next_dict, C_next_dict, C_next))
    features.append(get_feature(feat_dict_nextPOS_dict, nextPOS_dict, nextPOS))
    features.append(
        get_feature(feat_dict_CPOS_nextPOS_dict, CPOS_nextPOS_dict,
                    CPOS_nextPOS))
    features.append(
        get_feature(feat_dict_CParent_to_root_path_dict,
                    CParent_to_root_path_dict, cparent_to_root_path))
    features.append(
        get_feature(feat_dict_compressed_CParent_to_root_path_dict,
                    compressed_CParent_to_root_path_dict, compressed_path))
    ''' pitler '''
    features.append(
        get_feature(feat_dict_self_category_dict, self_category_dict,
                    self_category))
    features.append(
        get_feature(feat_dict_parent_category_dict, parent_category_dict,
                    parent_category))
    features.append(
        get_feature(feat_dict_left_sibling_category_dict,
                    left_sibling_category_dict, left_sibling_category))
    features.append(
        get_feature(feat_dict_right_sibling_category_dict,
                    right_sibling_category_dict, right_sibling_category))

    feat_dict_is_right_sibling_contains_VP = {}
    if syntax_tree.tree != None and right_sibling_category_node != None:
        T = right_sibling_category_node.get_descendants()
        T.append(right_sibling_category_node)
        for node in T:
            if node.name == "VP" or node.name == "S":
                feat_dict_is_right_sibling_contains_VP[1] = 1
                break
    features.append(Feature("", 1, feat_dict_is_right_sibling_contains_VP))
    ''' conn-syn '''
    features.append(
        get_feature(feat_dict_conn_self_category_dict, conn_self_category_dict,
                    conn_self_category))
    features.append(
        get_feature(feat_dict_conn_parent_category_dict,
                    conn_parent_category_dict, conn_parent_category))
    features.append(
        get_feature(feat_dict_conn_left_sibling_category_dict,
                    conn_left_sibling_category_dict,
                    conn_left_sibling_category))
    features.append(
        get_feature(feat_dict_conn_right_sibling_category_dict,
                    conn_right_sibling_category_dict,
                    conn_right_sibling_category))
    ''' syn-syn '''

    features.append(
        get_feature(feat_dict_self_parent, self_parent_dict, self_parent))
    features.append(
        get_feature(feat_dict_self_right, self_right_dict, self_right))
    features.append(get_feature(feat_dict_self_left, self_left_dict,
                                self_left))
    features.append(
        get_feature(feat_dict_parent_left, parent_left_dict, parent_left))
    features.append(
        get_feature(feat_dict_parent_right, parent_right_dict, parent_right))
    features.append(
        get_feature(feat_dict_left_right, left_right_dict, left_right))
    ''' mine '''
    features.append(get_feature_by_feat(dict_conn_lower_case, conn_lower_case))
    features.append(get_feature_by_feat(dict_conn, conn_name))

    features.append(
        get_feature_by_feat_list(dict_CParent_to_root_path_node_names,
                                 _path.split("-->")))
    features.append(
        get_feature_by_feat(dict_conn_rightSiblingCtx, conn_rightSiblingCtx))
    features.append(
        get_feature_by_feat(dict_conn_parent_category_Ctx,
                            conn_parent_categoryCtx))

    return util.mergeFeatures(features)
Exemplo n.º 28
0
def _all_features(arg_clauses, clause_index, parse_dict):
    # load dict
    dict_lowercase_verbs = Ps_arg2_dict().dict_lowercase_verbs
    dict_lemma_verbs = Ps_arg2_dict().dict_lemma_verbs
    dict_curr_first = Ps_arg2_dict().dict_curr_first
    dict_curr_last = Ps_arg2_dict().dict_curr_last
    dict_prev_last = Ps_arg2_dict().dict_prev_last
    dict_next_first = Ps_arg2_dict().dict_next_first
    dict_prev_last_curr_first = Ps_arg2_dict().dict_prev_last_curr_first
    dict_curr_last_next_first = Ps_arg2_dict().dict_curr_last_next_first
    dict_curr_production_rule = Ps_arg2_dict().dict_curr_production_rule
    dict_position = {"left": 1, "middle": 2, "right": 3}
    ''' mine '''
    dict_con_str = Ps_arg2_dict().dict_con_str
    dict_con_lstr = Ps_arg2_dict().dict_con_lstr
    dict_con_cat = {"subordinator": 1, "coordinator": 2, "adverbial": 3}
    dict_conn_to_root_path = Ps_arg2_dict().dict_conn_to_root_path
    dict_conn_to_root_compressed_path = Ps_arg2_dict(
    ).dict_conn_to_root_compressed_path
    dict_conn_position = Ps_arg2_dict().dict_conn_position

    # feature
    lowercase_verbs_list = dict_util.get_curr_lowercased_verbs(
        arg_clauses, clause_index, parse_dict)
    lemma_verbs_list = dict_util.get_curr_lemma_verbs(arg_clauses,
                                                      clause_index, parse_dict)

    curr_first = dict_util.get_curr_first(arg_clauses, clause_index,
                                          parse_dict)
    curr_last = dict_util.get_curr_last(arg_clauses, clause_index, parse_dict)
    prev_last = dict_util.get_prev_last(arg_clauses, clause_index, parse_dict)
    next_first = dict_util.get_next_first(arg_clauses, clause_index,
                                          parse_dict)
    prev_last_curr_first = "%s_%s" % (prev_last, curr_first)
    curr_last_next_first = "%s_%s" % (curr_last, next_first)

    # the position of the current clause
    position = dict_util.get_curr_position(arg_clauses, clause_index,
                                           parse_dict)

    production_rule_list = dict_util.get_curr_production_rule(
        arg_clauses, clause_index, parse_dict)
    ''' mine '''
    con_str = dict_util.get_con_str(arg_clauses, clause_index, parse_dict)
    con_lstr = dict_util.get_con_lstr(arg_clauses, clause_index, parse_dict)
    con_cat = dict_util.get_con_cat(arg_clauses, clause_index, parse_dict)
    conn_to_root_path = dict_util.get_conn_to_root_path(
        arg_clauses, clause_index, parse_dict)
    conn_to_root_compressed_path = dict_util.get_conn_to_root_compressed_path(
        arg_clauses, clause_index, parse_dict)
    conn_position = dict_util.get_conn_position(arg_clauses, clause_index,
                                                parse_dict)

    features = []
    features.append(
        get_feature_by_feat_list(dict_lowercase_verbs, lowercase_verbs_list))
    features.append(
        get_feature_by_feat_list(dict_lemma_verbs, lemma_verbs_list))

    features.append(get_feature_by_feat(dict_curr_first, curr_first))
    features.append(get_feature_by_feat(dict_curr_last, curr_last))
    features.append(get_feature_by_feat(dict_prev_last, prev_last))
    features.append(get_feature_by_feat(dict_next_first, next_first))
    features.append(
        get_feature_by_feat(dict_prev_last_curr_first, prev_last_curr_first))
    features.append(
        get_feature_by_feat(dict_curr_last_next_first, curr_last_next_first))
    features.append(get_feature_by_feat(dict_position, position))
    ''' production rules '''
    features.append(
        get_feature_by_feat_list(dict_curr_production_rule,
                                 production_rule_list))
    ''' mine '''
    features.append(get_feature_by_feat(dict_con_str, con_str))
    features.append(get_feature_by_feat(dict_con_lstr, con_lstr))
    features.append(get_feature_by_feat(dict_con_cat, con_cat))
    features.append(
        get_feature_by_feat(dict_conn_to_root_path, conn_to_root_path))
    features.append(
        get_feature_by_feat(dict_conn_to_root_compressed_path,
                            conn_to_root_compressed_path))
    features.append(get_feature_by_feat(dict_conn_position, conn_position))

    return util.mergeFeatures(features)