Exemplo n.º 1
0
def generate_features(stances, dataset, name, bow_vectorizer, tfreq_vectorizer,
                      tfidf_vectorizer):
    h, b, y = [], [], []

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_overlap = gen_or_load_feats(word_overlap_features, h, b,
                                  "features/overlap." + name + ".npy")

    # X_refuting_body: I add a new refuting feature about the existence of refuting words in the body
    X_refuting_head, X_refuting_body = refuting_features(h, b)

    X_polarity = gen_or_load_feats(polarity_features, h, b,
                                   "features/polarity." + name + ".npy")
    X_hand = gen_or_load_feats(hand_features, h, b,
                               "features/hand." + name + ".npy")

    # X_senti_head: The sentiment vector of the headline;
    # X_senti_body:  The sentiment vector of the body;
    # X_senti_cos : The cosine similarity between the sentiment vectors of the headline and body.

    X_senti_head, X_senti_body, X_senti_cos = sentiment_features(h, b)

    # X_tf_cos : The cosine similarity between the TF vectors of the headline and body
    # X_tf_idf_cos : The cosine similarity between the TF-IDF vectors of the headline and body.
    X_tf_cos, X_tf_idf_cos = gen_tf_idf_feats(stances, dataset.articles,
                                              bow_vectorizer, tfreq_vectorizer,
                                              tfidf_vectorizer)

    X = np.c_[X_hand, X_polarity, X_refuting_head, X_overlap, X_tf_cos]
    return X, y
def generate_features(stances, dataset, name, number_of_words=5000):
    h, b, y = [], [], []

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    vocabulary = vocabularyForm(h, b, number_of_words)

    #print(vocabulary)
    if name != "vocab":
        X_overlap = gen_or_load_feats(word_overlap_features, h, b,
                                      "features/overlap." + name + ".npy")
        X_refuting = gen_or_load_feats(refuting_features, h, b,
                                       "features/refuting." + name + ".npy")
        X_polarity = gen_or_load_feats(polarity_features, h, b,
                                       "features/polarity." + name + ".npy")
        X_hand = gen_or_load_feats(hand_features, h, b,
                                   "features/hand." + name + ".npy")
        X_Bow = calculateBOW(h, b, vocabulary)

        X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_Bow]

        return X, y
Exemplo n.º 3
0
def generate_features(stances, dataset, name):
    h, b, y = [], [], []

    print('* GENERATING FEATURES *')
    print(datetime.now())

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_overlap = gen_or_load_feats(word_overlap_features, h, b,
                                  "features/overlap." + name + ".npy")
    X_refuting = gen_or_load_feats(refuting_features, h, b,
                                   "features/refuting." + name + ".npy")
    X_polarity = gen_or_load_feats(polarity_features, h, b,
                                   "features/polarity." + name + ".npy")
    X_hand = gen_or_load_feats(hand_features, h, b,
                               "features/hand." + name + ".npy")
    X_sentiment = gen_or_load_feats(sentiment_features, h, b,
                                    "features/sentiment." + name + ".npy")
    #X_tfidf = gen_or_load_feats(tfidf_features, h, b, "features/tfidf."+name+".npy")

    X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_sentiment]

    print('* FINISHED GENERATING FEATURES *')
    print(datetime.now())

    return X, y
Exemplo n.º 4
0
def generate_baseline_features(stances, dataset, name, binary=True):
    h, b, y = [], [], []
    baseline_dir = '../baseline/'
    for stance in stances:
        if (binary != True):
            y.append(LABELS.index(stance['Stance']))
        else:
            if LABELS.index(stance['Stance']) < 3:
                y.append(0)
            else:
                y.append(1)

        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_overlap = gen_or_load_feats(
        word_overlap_features, h, b,
        baseline_dir + "features/overlap." + name + ".npy")
    X_refuting = gen_or_load_feats(
        refuting_features, h, b,
        baseline_dir + "features/refuting." + name + ".npy")
    X_polarity = gen_or_load_feats(
        polarity_features, h, b,
        baseline_dir + "features/polarity." + name + ".npy")
    X_hand = gen_or_load_feats(hand_features, h, b,
                               baseline_dir + "features/hand." + name + ".npy")
    X = np.c_[X_hand, X_polarity, X_refuting, X_overlap]
    return X, y
Exemplo n.º 5
0
def generate_features_keywords_with_IDs(stances, dataset, name, mode):
    h, b, y, IDs = [], [], [], []

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])
        IDs.append(stance['Body ID'])

    X_overlap = gen_or_load_feats(word_overlap_features, h, b,
                                  "features/overlap." + name + ".npy")
    X_refuting = gen_or_load_feats(refuting_features, h, b,
                                   "features/refuting." + name + ".npy")
    X_polarity = gen_or_load_feats(
        polarity_features_NLTK, h, b,
        "features/polarity_NLTK_full." + name + ".npy")
    X_hand = gen_or_load_feats(hand_features, h, b,
                               "features/hand." + name + ".npy")
    if mode == 'train':
        X_keywords = gen_or_load_feats_with_IDs(
            keywords_features_train, h, b, IDs,
            "features/keywords_." + name + ".npy")
    else:
        X_keywords = gen_or_load_feats_with_IDs(
            keywords_features_competition, h, b, IDs,
            "features/keywords_." + name + ".npy")
    X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_keywords]
    return X, y
Exemplo n.º 6
0
def generate_features(stances, dataset, name, filters=False):
    h, b, y = [], [], []

    for stance in stances:
        if filters:
            if LABELS.index(stance['Stance']) == 3:
                y.append(0)
            else:
                y.append(1)
        else:
            y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_overlap = gen_or_load_feats(word_overlap_features, h, b,
                                  "features/overlap." + name + ".npy")
    X_refuting = gen_or_load_feats(refuting_features, h, b,
                                   "features/refuting." + name + ".npy")
    X_polarity = gen_or_load_feats(polarity_features, h, b,
                                   "features/polarity." + name + ".npy")
    X_hand = gen_or_load_feats(hand_features, h, b,
                               "features/hand." + name + ".npy")
    #X_sentiment = gen_or_load_feats(sentiment_features, h, b, "features/sentiment."+name+".npy")
    #X_cosinetfidf = gen_or_load_feats(cosine_tfidf_features, h, b, "features/cosinetfidf."+name+".npy")
    #X_bleu = gen_or_load_feats(bleu_features, h, b, "features/bleu."+name+".npy")

    X = np.c_[X_refuting, X_polarity, X_hand, X_overlap]
    return X, y
Exemplo n.º 7
0
def generate_features(stances, dataset, name):
    h, b, kh, kb, y = [], [], [], [], []

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])
        kh.append(stance['Key'])
        kb.append(dataset.keys[stance['Body ID']])

    X_overlap = gen_or_load_feats(
        word_overlap_features, h, b, kh, kb,
        "your--path/features/overlap." + name + ".npy")
    #X_refuting = gen_or_load_feats(refuting_features, h, b,kh,kb, "your--path/features/refuting."+name+".npy")
    #X_polarity = gen_or_load_feats(polarity_features, h, b,kh,kb, "your--path/features/polarity."+name+".npy")
    X_hand = gen_or_load_feats(
        hand_features, h, b, kh, kb,
        "your--path/stage1/features/hand." + name + ".npy")
    X_sc = gen_or_load_feats(
        score_feature, h, b, kh, kb,
        "your--path/stage1/features/score." + name + ".npy")
    X_wvs = gen_or_load_feats(
        word_vec_sim, h, b, kh, kb,
        "your--path/stage1/features/wv_sim." + name + ".npy")
    X_fs = gen_or_load_feats(
        features_sim, h, b, kh, kb,
        "your--path/stage1/features/feat_sim." + name + ".npy")
    #X= np.c_[ X_hand,X_overlap,X_polarity,X_sc,X_wvs]
    #X = np.c_[X_hand, X_polarity, X_refuting, X_overlap]
    #X = np.c_[X_hand,X_fs, X_refuting, X_overlap, X_sc]
    X = np.c_[X_hand, X_fs, X_overlap, X_sc, X_wvs]
    #X= np.c_[X_hand, X_fs,X_overlap,X_sc]
    #X= np.c_[ X_hand]
    return X, y
Exemplo n.º 8
0
def generate_features_all(stances,dataset,name,repl):
    # Pass all articles through here first
    id, h, b, y = init_features(stances,dataset,repl)

    X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap."+name+".npy")
    X_hand = gen_or_load_feats(hand_features, h, b, "features/hand."+name+".npy")

    X = np.c_[X_overlap, X_hand]
    return X,y,id
Exemplo n.º 9
0
def generate_features_biased(stances,dataset,name,repl):
    # Pass biased articles through here third
    id, h, b, y = init_features(stances,dataset,repl)

    X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting."+name+".npy")
    X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity."+name+".npy")
    X_hand = gen_or_load_feats(hand_features, h, b, "features/hand."+name+".npy")

    X = np.c_[X_polarity, X_refuting, X_hand]
    return X,y,id
Exemplo n.º 10
0
def generate_features_related(stances,dataset,name,repl):
    # Pass related articles through here second
    id, h, b, y = init_features(stances,dataset,repl)

    X_bayes = gen_or_load_feats(naive_bayes_features, h, b, "features/bayes."+name+".npy")
    X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting."+name+".npy")
    X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity."+name+".npy")
    X_hand = gen_or_load_feats(hand_features, h, b, "features/hand."+name+".npy")
    X_format = gen_or_load_feats(format_features, h, b, "features/format."+name+".npy")

    X = np.c_[X_polarity, X_refuting, X_hand, X_bayes, X_format]
    return X,y,id
Exemplo n.º 11
0
def generate_features_related(stances, dataset, name):
    # Pass related articles through here second
    id, h, b, y = init_features(stances, dataset, {'agree': 'disagree'})

    X_refuting = gen_or_load_feats(refuting_features, h, b,
                                   "features/refuting." + name + ".npy")
    X_polarity = gen_or_load_feats(polarity_features, h, b,
                                   "features/polarity." + name + ".npy")
    X_hand = gen_or_load_feats(hand_features, h, b,
                               "features/hand." + name + ".npy")

    X = np.c_[X_polarity, X_refuting, X_hand]
    return X, y, id
Exemplo n.º 12
0
def generate_features(stances, dataset, name):
    h, b, y = [], [], []

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_overlap = gen_or_load_feats(word_overlap_features, h, b,
                                  "features/overlap." + name + ".npy")
    X_refuting = gen_or_load_feats(refuting_features, h, b,
                                   "features/refuting." + name + ".npy")
    X_polarity = gen_or_load_feats(polarity_features, h, b,
                                   "features/polarity." + name + ".npy")
    X_sentiment = gen_or_load_feats(sentiment_analyzer, h, b,
                                    "features/sentiment." + name + ".npy")
    X_hand = gen_or_load_feats(hand_features, h, b,
                               "features/hand." + name + ".npy")
    X_ner = gen_or_load_feats(name_entity_similarity, h, b,
                              "features/ner." + name + ".npy")
    X_Q = gen_or_load_feats(question_mark_ending, h, b,
                            "features/Q." + name + ".npy")
    X_doc2vec = gen_or_load_feats(doc2vec_feature, h, b,
                                  "features/doc2vec." + name + ".npy")

    X = np.c_[X_hand, X_sentiment, X_polarity, X_refuting, X_overlap, X_ner,
              X_Q, X_doc2vec]
    return X, y
Exemplo n.º 13
0
def generate_features(stances,dataset,name):
    h, b, y = [],[],[]

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap."+name+".npy")
    X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting."+name+".npy")
    X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity."+name+".npy")
    X_hand = gen_or_load_feats(hand_features, h, b, "features/hand."+name+".npy")

    X = np.c_[X_hand, X_polarity, X_refuting, X_overlap]
    return X,y
Exemplo n.º 14
0
def generate_features(stances,dataset,name):
    h, b, y = [],[],[]

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap."+name+".npy")
    X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting."+name+".npy")
    X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity."+name+".npy")
    X_hand = gen_or_load_feats(hand_features, h, b, "features/hand."+name+".npy")

    X = np.c_[X_hand, X_polarity, X_refuting, X_overlap]
    return X,y
Exemplo n.º 15
0
def generate_features(stances,dataset,name):
    h, b, y = [],[],[]

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap."+name+".npy")
    X_hand = gen_or_load_feats(hand_features, h, b, "features/hand."+name+".npy")

    X_tf_idf = gen_or_load_feats(tf_idf_features, h, b, "features/tf_idf."+name+".npy")
    X_sentiment = gen_or_load_feats(sentiment_features, h, b, "features/sentiment."+name+".npy")

    X = np.c_[X_hand, X_overlap, X_tf_idf, X_sentiment]
    return X, y
Exemplo n.º 16
0
def generate_features(dataset):
    h, b = [], []

    for stance in dataset.stances:
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_overlap = gen_or_load_feats(word_overlap_features, h, b,
                                  "features/overlap.npy")
    X_refuting = gen_or_load_feats(refuting_features, h, b,
                                   "features/refuting.npy")
    X_polarity = gen_or_load_feats(polarity_features, h, b,
                                   "features/polarity.npy")
    X_hand = gen_or_load_feats(hand_features, h, b, "features/hand.npy")

    X = np.c_[X_refuting, X_polarity, X_hand, X_overlap]
    return X
Exemplo n.º 17
0
def generate_features(stances, dataset, name):
    h, b, y = [], [], []

    for stance in stances:
        if params.run_2_class:
            if name != 'competition':
                y.append(LABELS_RELATED.index(stance['Stance']))
            else:
                y.append(LABELS.index(stance['Stance']))
        else:
            y.append(LABELS.index(stance['Stance']))

        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_overlap = gen_or_load_feats(word_overlap_features, h, b,
                                  "features/overlap." + name + ".npy")
    X_refuting = gen_or_load_feats(refuting_features, h, b,
                                   "features/refuting." + name + ".npy")
    X_polarity = gen_or_load_feats(polarity_features, h, b,
                                   "features/polarity." + name + ".npy")
    X_hand = gen_or_load_feats(hand_features, h, b,
                               "features/hand." + name + ".npy")
    X_overlap_quotes = gen_or_load_feats(
        word_overlap_quotes_features, h, b,
        "features/overlap_quotes." + name + ".npy")
    X_overlap_pos = gen_or_load_feats(word_overlap_pos_features, h, b,
                                      "features/overlap_pos." + name + ".npy")
    X_overlap_pos_sentence = gen_or_load_feats(
        word_overlap_split_bodies_features, h, b,
        "features/overlap_pos_sentence_split_bodies." + name + ".npy")
    X_tfidf = gen_or_load_feats(word_tfidf_features, h, b,
                                "features/tfidf_pos." + name + ".npy")
    X_tfidf_max = gen_or_load_feats(word_tfidf_pos_ss_features, h, b,
                                    "features/tfidf_pos_max." + name + ".npy")
    X_overlap_bpe_SS = gen_or_load_feats(
        word_overlap_bpe_features, h, b,
        "features/overlap_bpe_nltk_tag3." + name + ".npy")

    X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_overlap_quotes,
              X_overlap_pos, X_overlap_pos_sentence, X_tfidf, X_tfidf_max,
              X_overlap_bpe_SS]
    return X, y
Exemplo n.º 18
0
def generate_features_nn(dataset):
    h, b = [], []

    for stance in dataset.stances:
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_glove = gen_or_load_feats(glove_features, h, b,
                                "features/glove_features.npy")

    return X_glove
Exemplo n.º 19
0
def generate_test_features(stances, dataset, name):
    h, b = [], []

    for stance in stances:
        h.append(stance['Headline'])
        b.append(dataset.body[stance['Body ID']])

    X_lda = gen_or_load_feats(lda_features, h, b,
                              "features/lda" + name + ".npy")
    X_overlap = gen_or_load_feats(word_overlap_features, h, b,
                                  "features/overlap." + name + ".npy")
    X_refuting = gen_or_load_feats(refuting_features, h, b,
                                   "features/refuting." + name + ".npy")
    X_polarity = gen_or_load_feats(polarity_features, h, b,
                                   "features/polarity." + name + ".npy")
    X_hand = gen_or_load_feats(hand_features, h, b,
                               "features/hand." + name + ".npy")

    X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_lda]
    return X
Exemplo n.º 20
0
def generate_baseline_test_features(stances, dataset, name):
    h, b = [], []
    baseline_dir = '../baseline/'
    for stance in stances:
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_overlap = gen_or_load_feats(
        word_overlap_features, h, b,
        baseline_dir + "features/overlap." + name + "_.npy")
    X_refuting = gen_or_load_feats(
        refuting_features, h, b,
        baseline_dir + "features/refuting." + name + "_.npy")
    X_polarity = gen_or_load_feats(
        polarity_features, h, b,
        baseline_dir + "features/polarity." + name + "_.npy")
    X_hand = gen_or_load_feats(
        hand_features, h, b, baseline_dir + "features/hand." + name + "_.npy")

    X = np.c_[X_hand, X_polarity, X_refuting, X_overlap]
    return X
def generate_features(stances, dataset, name):
    h, b, y = [], [], []

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_sentiment = gen_or_load_feats(sentiment_features, h, b,
                                    "features/sentiment." + name + ".npy")
    X_ner = gen_or_load_feats(ner_features, h, b,
                              "features/ner." + name + ".npy")
    X_overlap = gen_or_load_feats(word_overlap_features, h, b,
                                  "features/overlap." + name + ".npy")
    X_refuting = gen_or_load_feats(refuting_features, h, b,
                                   "features/refuting." + name + ".npy")
    X_polarity = gen_or_load_feats(polarity_features, h, b,
                                   "features/polarity." + name + ".npy")
    X_hand = gen_or_load_feats(hand_features, h, b,
                               "features/hand." + name + ".npy")
    X_bert = bert_features("features/combined_bert_" + name + ".csv")
    X_cosine = cosine_features(name, "features/cosine." + name + ".npy")

    X = np.c_[X_refuting, X_overlap, X_hand, X_sentiment, X_ner, X_polarity,
              X_bert, X_cosine]

    # X_train_79 = pd.concat([refuting_features, overlap_features, hand_features.loc[:,0:3],hand_features.loc[:,16:],
    #sentiment_features, ner_features, polarity_features, train_combined, pd.Series(cosine_sim_train)], axis=1)

    return X, y
Exemplo n.º 22
0
def generate_features(stances, dataset, name):
    print("Generating Features for :", name)
    h, b, y = [], [], []

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_overlap = gen_or_load_feats(word_overlap_features, h, b,
                                  "features/overlap." + name + ".npy")
    X_refuting = gen_or_load_feats(refuting_features, h, b,
                                   "features/refuting." + name + ".npy")
    X_polarity = gen_or_load_feats(polarity_features, h, b,
                                   "features/polarity." + name + ".npy")
    X_hand = gen_or_load_feats(hand_features, h, b,
                               "features/hand." + name + ".npy")
    X_bowv = np.array(
        gen_or_load_feats(bow_averaged_vectors, h, b,
                          "features/bowvec_200dnorm." + name + ".npy"))
    X_bowc = np.array(
        gen_or_load_feats(bow_count_vectors, h, b,
                          "features/bowcount_1000." + name + ".npy"))
    X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_bowv, X_bowc]
    print("... Done. Features :", X.shape[1])
    return X, y
Exemplo n.º 23
0
def generate_features(headlines, bodies, name, possibility, bow_vectorizer,
                      tfreq_vectorizer, tfidf_vectorizer):
    #generate or load the features, note that we generate the features even if they're not currently used in this possibility, since it will be saved for later possibilities

    X_overlap = gen_or_load_feats(word_overlap_features, headlines, bodies,
                                  "features/overlap." + name + ".npy")
    X_refuting = gen_or_load_feats(refuting_features, headlines, bodies,
                                   "features/refuting." + name + ".npy")
    X_grammar_dependencies = gen_or_load_feats(
        grammar_dependencies_count, headlines, bodies,
        "features/grammar" + name + ".npy")
    X_tf_idf = gen_or_load_feats(tfIdf_features, headlines, bodies,
                                 "features/tfidf." + name + ".npy",
                                 bow_vectorizer, tfreq_vectorizer,
                                 tfidf_vectorizer)

    features = []
    verbos = ""
    if possibility[0] == '1':
        features.append(X_overlap)
        verbos += " * X_Overlap"

    if possibility[1] == '1':
        features.append(X_refuting)
        verbos += " * X_refuting"

    if possibility[2] == '1':
        features.append(X_grammar_dependencies)
        verbos += " * X_grammar_dependencies"

    if possibility[3] == '1':
        features.append(X_tf_idf)
        verbos += " * X_tf_idf"

    verbos = "Test with the following Features : " + verbos
    X = stackFeatures(
        features)  # stack the generated feature in  a long vector table.
    return X, verbos
Exemplo n.º 24
0
def generate_features(stances, dataset, name, number_of_words=5000):
    h, b, y = [], [], []

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    vocabulary = vocabularyForm(h, b, number_of_words)

    #print(vocabulary)
    if name != "vocab":
        X_overlap = gen_or_load_feats(word_overlap_features, h, b,
                                      "features/overlap." + name + ".npy")
        X_refuting = gen_or_load_feats(refuting_features, h, b,
                                       "features/refuting." + name + ".npy")
        X_polarity = gen_or_load_feats(polarity_features, h, b,
                                       "features/polarity." + name + ".npy")
        X_hand = gen_or_load_feats(hand_features, h, b,
                                   "features/hand." + name + ".npy")
        X_Bow = calculateBOW(h, b, vocabulary)

        infersent = torch.load('infersent.allnli.pickle',
                               map_location=lambda storage, loc: storage)

        infersent.set_glove_path('dataset/glove.840B.300d.txt')
        sentences = []
        for i, j in zip(h, b):
            sentences.append(i)

        infersent.build_vocab(sentences, tokenize=True)
        X_embed = infersent.encode(sentences, tokenize=True)

        X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_Bow, X_embed]

        return X, y
Exemplo n.º 25
0
def generate_features_nn(stances, dataset, name, filters=False):
    h, b, y = [], [], []

    for stance in stances:
        if stance['Stance'] != 'unrelated' or not filters:
            y.append(LABELS_ONE_HOT[stance['Stance']])
            h.append(stance['Headline'])
            b.append(dataset.articles[stance['Body ID']])

    y = np.asarray(y)

    X_glove = gen_or_load_feats(glove_features, h, b,
                                "features/glove_features." + name + ".npy")

    return X_glove, y
Exemplo n.º 26
0
def generate_features(stances, dataset, name, only_related=False):
    h, b, y, y_bi = [], [], [], []

    related_dir = "re_" if only_related else ""
    for stance in stances:
        y_bi.append(stance['Stance_biClass'])
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_overlap = gen_or_load_feats(
        word_overlap_features, h, b,
        "features/" + related_dir + "overlap." + name + ".npy")
    X_refuting = gen_or_load_feats(
        refuting_features, h, b,
        "features/" + related_dir + "refuting." + name + ".npy")
    X_polarity = gen_or_load_feats(
        polarity_features, h, b,
        "features/" + related_dir + "polarity." + name + ".npy")
    X_sentiment = gen_or_load_feats(
        sentiment_analyzer, h, b,
        "features/" + related_dir + "sentiment." + name + ".npy")
    X_hand = gen_or_load_feats(
        hand_features, h, b,
        "features/" + related_dir + "hand." + name + ".npy")
    X_ner = gen_or_load_feats(
        name_entity_similarity, h, b,
        "features/" + related_dir + "ner." + name + ".npy")
    X_Q = gen_or_load_feats(name_entity_similarity, h, b,
                            "features/" + related_dir + "Q." + name + ".npy")
    X_doc2vec = gen_or_load_feats(
        doc2vec_feature, h, b,
        "features/" + related_dir + "doc2vec." + name + ".npy")

    X = np.c_[X_hand, X_sentiment, X_polarity, X_refuting, X_overlap, X_ner,
              X_Q, X_doc2vec]
    return X, y, y_bi
Exemplo n.º 27
0
def generate_features(stances, dataset, name):
    h, b, y = [], [], []

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_overlap = gen_or_load_feats(word_overlap_features, h, b,
                                  "features/overlap." + name + ".npy")
    X_refuting = gen_or_load_feats(refuting_features, h, b,
                                   "features/refuting." + name + ".npy")
    X_polarity = gen_or_load_feats(polarity_features, h, b,
                                   "features/polarity." + name + ".npy")
    X_hand = gen_or_load_feats(hand_features, h, b,
                               "features/hand." + name + ".npy")

    X_tf_idf = gen_or_load_feats(tf_idf_features, h, b,
                                 "features/tf_idf." + name + ".npy")

    X_svd = gen_or_load_feats(svd_features, h, b,
                              "features/svd." + name + ".npy")

    X_sentiment = gen_or_load_feats(sentiment_features, h, b,
                                    "features/sentiment." + name + ".npy")

    X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_tf_idf, X_svd,
              X_sentiment]

    # X = np.c_[X_hand, X_polarity, X_refuting, X_overlap]

    # pdb.set_trace()

    # from matplotlib import pyplot as plt
    # plt.plot(X_tf_idf.flatten())
    # plt.show()

    # plt.plot(X_svd.flatten())
    # plt.show()

    return X, y
Exemplo n.º 28
0
def generate_features(stances, dataset, name):
    h, b, y = [], [], []
    rows = []
    for stance in stances:
        row = []
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])
        row.append(stance['Headline'])
        row.append(dataset.articles[stance['Body ID']])
        row.append(LABELS.index(stance['Stance']))
        rows.append(row)

    X_overlap = gen_or_load_feats(word_overlap_features, h, b,
                                  "features/overlap." + name + ".npy")
    X_refuting = gen_or_load_feats(refuting_features, h, b,
                                   "features/refuting." + name + ".npy")
    X_polarity = gen_or_load_feats(polarity_features, h, b,
                                   "features/polarity." + name + ".npy")
    X_hand = gen_or_load_feats(hand_features, h, b,
                               "features/hand." + name + ".npy")

    ######Topic Modelling - New Features Added######
    X_NMF = gen_or_load_feats(NMF_cos_50, h, b,
                              "features/nmf." + name + ".npy")
    X_LDA = gen_or_load_feats(LDA_cos_25, h, b,
                              "features/lda-25." + name + ".npy")

    X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_NMF, X_LDA]
    if (name == "competition"):
        if not (os.path.isfile('comp_feature_data.csv')):
            comp_feature_data['stance'] = y
            comp_feature_data['headline'] = h
            comp_feature_data['body_id'] = b
            for i in range(0, X.shape[1]):
                comp_feature_data[i] = X[:, i]

    if (name == "full"):
        if not (os.path.isfile('train_feature_data.csv')):
            train_feature_data['stance'] = y
            train_feature_data['headline'] = h
            train_feature_data['body_id'] = b
            for i in range(0, X.shape[1]):
                train_feature_data[i] = X[:, i]
    return X, y
Exemplo n.º 29
0
def generate_features(stances, dataset, name):
    h, b, y = [], [], []

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_overlap = gen_or_load_feats(word_overlap_features, h, b,
                                  "features/overlap." + name + ".npy")
    X_refuting = gen_or_load_feats(refuting_features, h, b,
                                   "features/refuting." + name + ".npy")
    X_polarity = gen_or_load_feats(polarity_features, h, b,
                                   "features/polarity." + name + ".npy")
    X_hand = gen_or_load_feats(hand_features, h, b,
                               "features/hand." + name + ".npy")

    X_tf_idf = gen_or_load_feats(tf_idf_features, h, b,
                                 "features/tf_idf." + name + ".npy")

    X_svd = gen_or_load_feats(svd_features, h, b,
                              "features/svd." + name + ".npy")

    X_sentiment = gen_or_load_feats(sentiment_features, h, b,
                                    "features/sentiment." + name + ".npy")

    X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_tf_idf, X_svd,
              X_sentiment]

    # X = np.c_[X_hand, X_polarity, X_refuting, X_overlap]

    # from matplotlib import pyplot as plt
    # compound = sorted(zip(X_sentiment[:,3]-X_sentiment[:,7], y), key = lambda x: x[1])
    # plt.plot(compound)
    # plt.show()

    # fig1 = plt.gcf()
    # overlap = sorted(zip(X_refuting.flatten(), y), key = lambda x: x[1])
    # plt.plot(overlap)
    # plt.draw()
    # plt.show()
    # fig1.savefig('refuting.png')

    # fig2 = plt.gcf()
    # overlap = sorted(zip(X_hand.flatten(), y), key = lambda x: x[1])
    # plt.plot(overlap)
    # plt.draw()
    # plt.show()
    # fig2.savefig('hand.png')

    # fig3 = plt.gcf()
    # overlap = sorted(zip(X_polarity.flatten(), y), key = lambda x: x[1])
    # plt.plot(overlap)
    # plt.draw()
    # plt.show()
    # fig3.savefig('polarity.png')

    # fig4 = plt.gcf()
    # tfidf = sorted(zip(X_tf_idf.flatten(), y), key = lambda x: x[1])
    # plt.plot(tfidf)
    # plt.draw()
    # plt.show()
    # fig4.savefig('tfidf.png')

    # fig5 = plt.gcf()
    # svd = sorted(zip(X_svd.flatten(), y), key = lambda x: x[1])
    # plt.plot(svd)
    # plt.draw()
    # plt.show()
    # fig5.savefig('svd.png')

    # fig6 = plt.gcf()
    # svd = sorted(zip(X_sentiment[:,3] - X_sentiment[:,7], y), key = lambda x: x[1])
    # plt.plot(svd)
    # plt.draw()
    # plt.show()
    # fig6.savefig('sentiment.png')

    return X, y
Exemplo n.º 30
0
def generate_features(stances, dataset, name):
    h, b, y = [], [], []

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_overlap = gen_or_load_feats(word_overlap_features, h, b,
                                  "features/overlap." + name + ".npy")
    X_refuting = gen_or_load_feats(refuting_features, h, b,
                                   "features/refuting." + name + ".npy")
    X_polarity = gen_or_load_feats(polarity_features, h, b,
                                   "features/polarity." + name + ".npy")
    X_hand = gen_or_load_feats(hand_features, h, b,
                               "features/hand." + name + ".npy")

    X = np.c_[X_hand, X_polarity, X_refuting, X_overlap]

    yur = []  # only has related/unreleted labels
    for i in range(len(y)):
        if y[i] == 3:
            yur.append(3)
        else:
            yur.append(0)  #0 to indicate related

    yr = []  # only include related entries
    Xr = []
    for i in range(len(y)):
        if y[i] == 3: continue
        yr.append(y[i])
        Xr.append(X[i])

    yr_discussVsAd = []
    for i in range(len(y)):
        if y[i] == 3: continue
        elif y[i] == 2: yr_discussVsAd.append(2)
        else: yr_discussVsAd.append(0)  # 0 to indicate both agree and disagree

    Xad = []
    yad_agreeVsdisagree = []
    for i in range(len(y)):
        if y[i] == 3 or y[i] == 2: continue
        yad_agreeVsdisagree.append(y[i])
        Xad.append(X[i])
    """
    X: original X
    y: original y
    yur: y for related/unrelated. still the original size, use 0 to indicate related, 3 unrelated
    Xr: subset of X which only contains related pairs
    yr: subset of y which only contains related pairs
    yr_discussVsAd: the same size as yr. use 0 to indicate agree and disagree, 2 for discuss
    Xad: subset of X which only contains agree and disagree
    yad_agreeVsdisagree: same size as Xad, 0 for agree and 1 for disagree
    
    To seperate only related vs unrelated, use X and yur
    To seperate only discuss vs agree/disagree, use Xr and yr_discussVsAd
    To seperate only agree vs disagree, use Xad and yad_agreeVsdisagree
    """

    return X, y, yur, Xr, yr, yr_discussVsAd, Xad, yad_agreeVsdisagree