Пример #1
0
def extract_features(wordseg, ql, qr, tfidf_count_hash_vectorModels):
    sent_l = Sentence()
    sent_l.raw_form = ql
    sent_l.base_form = ql
    wordseg_out = wordseg.segment(ql) 
    sent_l.basic_words = wordseg_out.basic_words
    
    feature_dict = {}
    sent_r = Sentence()
    sent_r.raw_form = qr
    sent_r.base_form = qr
    wordseg_out = wordseg.segment(qr) 
    sent_r.basic_words = wordseg_out.basic_words
    
    l_notion = get_notional_tokens(sent_l)
    r_notion = get_notional_tokens(sent_r)
    
    count_tfidf_hash_features = get_tfidf_count_hash_features(sent_l, sent_r, tfidf_count_hash_vectorModels)
    feature_dict.update(count_tfidf_hash_features)
    
    notion_count_tfidf_hash_features = get_tfidf_count_hash_features(l_notion, r_notion, tfidf_count_hash_vectorModels, "notion")
    feature_dict.update(notion_count_tfidf_hash_features)
    
    for k in feature_dict:
        print(k)
    
    return feature_dict
Пример #2
0
def extract_features(wordseg, ql, qr, tfidf_count_hash_vectorModels,
                     sent_word2vec, sent_vocab_dict, sent_model):

    sent_l = Sentence()
    sent_l.raw_form = ql
    sent_l.base_form = ql
    wordseg_out = wordseg.segment(ql)
    sent_l.basic_words = wordseg_out.basic_words

    feature_dict = {}
    sent_r = Sentence()
    sent_r.raw_form = qr
    sent_r.base_form = qr
    wordseg_out = wordseg.segment(qr)
    sent_r.basic_words = wordseg_out.basic_words

    lexical_features = calc_lexical_features(sent_l, sent_r)
    feature_dict.update(lexical_features)

    count_tfidf_hash_features = get_tfidf_count_hash_features(
        sent_l, sent_r, tfidf_count_hash_vectorModels)
    feature_dict.update(count_tfidf_hash_features)

    sentvec_features = get_sentvec_features(sent_word2vec, sent_vocab_dict,
                                            sent_model, sent_l, sent_r)
    feature_dict.update(sentvec_features)

    return feature_dict
Пример #3
0
def extract_features(wordseg, ql, qr):
    sent_l = Sentence()
    sent_l.raw_form = ql
    sent_l.base_form = ql
    wordseg_out = wordseg.segment(ql)
    sent_l.basic_words = wordseg_out.basic_words

    sent_r = Sentence()
    sent_r.raw_form = qr
    sent_r.base_form = qr
    wordseg_out = wordseg.segment(qr)
    sent_r.basic_words = wordseg_out.basic_words

    l_periph = get_periph(sent_l, qr)
    r_periph = get_periph(sent_r, ql)

    feature_dict = {}

    lexical_features = calc_lexical_features(sent_l, sent_r)
    feature_dict.update(lexical_features)

    periph_lexical_features = calc_periph_lexical_features(l_periph, r_periph)
    feature_dict.update(periph_lexical_features)

    return feature_dict
Пример #4
0
def extract_features(wordseg, ql, qr, sent_word2vec, word_weights, sent_model):
    sent_l = Sentence()
    sent_l.raw_form = ql
    sent_l.base_form = ql
    wordseg_out = wordseg.segment(ql)
    sent_l.basic_words = wordseg_out.basic_words

    sent_r = Sentence()
    sent_r.raw_form = qr
    sent_r.base_form = qr
    wordseg_out = wordseg.segment(qr)
    sent_r.basic_words = wordseg_out.basic_words

    l_notion = get_notional_tokens(sent_l)
    r_notion = get_notional_tokens(sent_r)

    feature_dict = {}

    sentvec_features = get_sentvec_features(sent_word2vec, word_weights,
                                            sent_model, sent_l, sent_r)
    feature_dict.update(sentvec_features)
    sentvec_features = get_sentvec_features(sent_word2vec, word_weights,
                                            sent_model, sent_l, sent_r,
                                            "notion")
    feature_dict.update(sentvec_features)

    for k, value in feature_dict.items():
        print(k)
        print(value)

    return feature_dict
Пример #5
0
def extract_features(wordseg, ql, qr, tfidf_count_hash_vectorModels,
                     sent_word2vec, sent_vocab_dict, sent_model, ner_dict,
                     syn_dict):
    sent_l = Sentence()
    sent_l.raw_form = ql
    sent_l.base_form = ql
    wordseg_out = wordseg.segment(ql)
    sent_l.basic_words = wordseg_out.basic_words

    feature_dict = {}
    sent_r = Sentence()
    sent_r.raw_form = qr
    sent_r.base_form = qr
    wordseg_out = wordseg.segment(qr)
    sent_r.basic_words = wordseg_out.basic_words

    l_notion = get_notional_tokens(sent_l)
    r_notion = get_notional_tokens(sent_r)

    l_periph = get_periph(sent_l, qr)
    r_periph = get_periph(sent_r, ql)
    #----------------------------------------------------------------------------------------------------------

    lexical_features = calc_lexical_features(sent_l, sent_r)
    feature_dict.update(lexical_features)
    periph_lexical_features = calc_periph_lexical_features(l_periph, r_periph)
    feature_dict.update(periph_lexical_features)

    mt_features = calc_mt_features(sent_l, sent_r)
    feature_dict.update(mt_features)

    count_tfidf_hash_features = get_tfidf_count_hash_features(
        sent_l, sent_r, tfidf_count_hash_vectorModels)
    feature_dict.update(count_tfidf_hash_features)
    notion_count_tfidf_hash_features = get_tfidf_count_hash_features(
        l_notion, r_notion, tfidf_count_hash_vectorModels, signature="notion")
    feature_dict.update(notion_count_tfidf_hash_features)

    sentvec_features = get_sentvec_features(sent_word2vec, sent_vocab_dict,
                                            sent_model, sent_l, sent_r)
    feature_dict.update(sentvec_features)
    sentvec_features = get_sentvec_features(sent_word2vec,
                                            sent_vocab_dict,
                                            sent_model,
                                            l_notion,
                                            r_notion,
                                            signature="notion")
    feature_dict.update(sentvec_features)

    ner_features = get_ner_features(sent_l, sent_r, ner_dict, syn_dict)
    feature_dict.update(ner_features)

    return feature_dict
Пример #6
0
def extract_features(wordseg, ql, qr, ner_dict, syn_dict):
    sent_l = Sentence()
    sent_l.raw_form = ql
    sent_l.base_form = ql
    wordseg_out = wordseg.segment(ql)
    sent_l.basic_words = wordseg_out.basic_words

    sent_r = Sentence()
    sent_r.raw_form = qr
    sent_r.base_form = qr
    wordseg_out = wordseg.segment(qr)
    sent_r.basic_words = wordseg_out.basic_words

    feature_dict = {}

    entities_features = get_ner_features(sent_l, sent_r, ner_dict, syn_dict)
    feature_dict.update(entities_features)

    return feature_dict
Пример #7
0
def split_ql_qr(wordseg, ql, qr):
    tokenized_ql = []
    tokenized_qr = []
    sent_l = Sentence()
    sent_l.raw_form = ql
    sent_l.base_form = ql
    wordseg_out = wordseg.segment(ql) 
    sent_l.basic_words = wordseg_out.basic_words
    items=[]
    for item in sent_l.basic_words:
        items.append(item.term)
    tokenized_ql.append(items)
    
    sent_r = Sentence()
    sent_r.raw_form = qr
    sent_r.base_form = qr
    wordseg_out = wordseg.segment(qr) 
    sent_r.basic_words = wordseg_out.basic_words
    items=[]
    for item in sent_r.basic_words:
        items.append(item.term)
    tokenized_qr.append(items)
    
    return tokenized_ql,tokenized_qr