Exemplo n.º 1
0
def generate_baseline_features(stances, dataset, name, binary=True):
    h, b, y = [], [], []
    baseline_dir = '../baseline/'
    for stance in stances:
        if (binary != True):
            y.append(LABELS.index(stance['Stance']))
        else:
            if LABELS.index(stance['Stance']) < 3:
                y.append(0)
            else:
                y.append(1)

        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_overlap = gen_or_load_feats(
        word_overlap_features, h, b,
        baseline_dir + "features/overlap." + name + ".npy")
    X_refuting = gen_or_load_feats(
        refuting_features, h, b,
        baseline_dir + "features/refuting." + name + ".npy")
    X_polarity = gen_or_load_feats(
        polarity_features, h, b,
        baseline_dir + "features/polarity." + name + ".npy")
    X_hand = gen_or_load_feats(hand_features, h, b,
                               baseline_dir + "features/hand." + name + ".npy")
    X = np.c_[X_hand, X_polarity, X_refuting, X_overlap]
    return X, y
Exemplo n.º 2
0
def generate_features(stances, dataset, name, model, mode, binary=False):
    headline, body = dict(), dict()

    headline['features'] = []
    headline['lengths'] = []
    body['features'] = []
    body['lengths'] = []

    y = []
    for stance in stances:
        if (binary != True):
            y.append(LABELS.index(stance['Stance']))
        else:
            if LABELS.index(stance['Stance']) < 3:
                y.append(0)
            else:
                y.append(1)
        headline_features, h_length = buildWordVector(stance['Headline'],
                                                      model, mode)
        body_features, b_length = buildWordVector(
            dataset.articles[stance['Body ID']], model, mode)
        headline['features'].append(headline_features)
        headline['lengths'].append(h_length)
        body['features'].append(body_features)
        body['lengths'].append(b_length)

    return headline, body, y
Exemplo n.º 3
0
def generate_features(stances, dataset, name, filters=False):
    h, b, y = [], [], []

    for stance in stances:
        if filters:
            if LABELS.index(stance['Stance']) == 3:
                y.append(0)
            else:
                y.append(1)
        else:
            y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_overlap = gen_or_load_feats(word_overlap_features, h, b,
                                  "features/overlap." + name + ".npy")
    X_refuting = gen_or_load_feats(refuting_features, h, b,
                                   "features/refuting." + name + ".npy")
    X_polarity = gen_or_load_feats(polarity_features, h, b,
                                   "features/polarity." + name + ".npy")
    X_hand = gen_or_load_feats(hand_features, h, b,
                               "features/hand." + name + ".npy")
    #X_sentiment = gen_or_load_feats(sentiment_features, h, b, "features/sentiment."+name+".npy")
    #X_cosinetfidf = gen_or_load_feats(cosine_tfidf_features, h, b, "features/cosinetfidf."+name+".npy")
    #X_bleu = gen_or_load_feats(bleu_features, h, b, "features/bleu."+name+".npy")

    X = np.c_[X_refuting, X_polarity, X_hand, X_overlap]
    return X, y
Exemplo n.º 4
0
def generate_features_keywords_with_IDs(stances, dataset, name, mode):
    h, b, y, IDs = [], [], [], []

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])
        IDs.append(stance['Body ID'])

    X_overlap = gen_or_load_feats(word_overlap_features, h, b,
                                  "features/overlap." + name + ".npy")
    X_refuting = gen_or_load_feats(refuting_features, h, b,
                                   "features/refuting." + name + ".npy")
    X_polarity = gen_or_load_feats(
        polarity_features_NLTK, h, b,
        "features/polarity_NLTK_full." + name + ".npy")
    X_hand = gen_or_load_feats(hand_features, h, b,
                               "features/hand." + name + ".npy")
    if mode == 'train':
        X_keywords = gen_or_load_feats_with_IDs(
            keywords_features_train, h, b, IDs,
            "features/keywords_." + name + ".npy")
    else:
        X_keywords = gen_or_load_feats_with_IDs(
            keywords_features_competition, h, b, IDs,
            "features/keywords_." + name + ".npy")
    X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_keywords]
    return X, y
def generate_features(stances, dataset, name, number_of_words=5000):
    h, b, y = [], [], []

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    vocabulary = vocabularyForm(h, b, number_of_words)

    #print(vocabulary)
    if name != "vocab":
        X_overlap = gen_or_load_feats(word_overlap_features, h, b,
                                      "features/overlap." + name + ".npy")
        X_refuting = gen_or_load_feats(refuting_features, h, b,
                                       "features/refuting." + name + ".npy")
        X_polarity = gen_or_load_feats(polarity_features, h, b,
                                       "features/polarity." + name + ".npy")
        X_hand = gen_or_load_feats(hand_features, h, b,
                                   "features/hand." + name + ".npy")
        X_Bow = calculateBOW(h, b, vocabulary)

        X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_Bow]

        return X, y
Exemplo n.º 6
0
def generate_features(stances, dataset, name):
    h, b, y = [], [], []

    print('* GENERATING FEATURES *')
    print(datetime.now())

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_overlap = gen_or_load_feats(word_overlap_features, h, b,
                                  "features/overlap." + name + ".npy")
    X_refuting = gen_or_load_feats(refuting_features, h, b,
                                   "features/refuting." + name + ".npy")
    X_polarity = gen_or_load_feats(polarity_features, h, b,
                                   "features/polarity." + name + ".npy")
    X_hand = gen_or_load_feats(hand_features, h, b,
                               "features/hand." + name + ".npy")
    X_sentiment = gen_or_load_feats(sentiment_features, h, b,
                                    "features/sentiment." + name + ".npy")
    #X_tfidf = gen_or_load_feats(tfidf_features, h, b, "features/tfidf."+name+".npy")

    X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_sentiment]

    print('* FINISHED GENERATING FEATURES *')
    print(datetime.now())

    return X, y
Exemplo n.º 7
0
def generate_features(stances, dataset, name):
    h, b, y = [], [], []

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_overlap = gen_or_load_feats(word_overlap_features, h, b,
                                  "features/overlap." + name + ".npy")
    X_refuting = gen_or_load_feats(refuting_features, h, b,
                                   "features/refuting." + name + ".npy")
    X_polarity = gen_or_load_feats(polarity_features, h, b,
                                   "features/polarity." + name + ".npy")
    X_sentiment = gen_or_load_feats(sentiment_analyzer, h, b,
                                    "features/sentiment." + name + ".npy")
    X_hand = gen_or_load_feats(hand_features, h, b,
                               "features/hand." + name + ".npy")
    X_ner = gen_or_load_feats(name_entity_similarity, h, b,
                              "features/ner." + name + ".npy")
    X_Q = gen_or_load_feats(question_mark_ending, h, b,
                            "features/Q." + name + ".npy")
    X_doc2vec = gen_or_load_feats(doc2vec_feature, h, b,
                                  "features/doc2vec." + name + ".npy")

    X = np.c_[X_hand, X_sentiment, X_polarity, X_refuting, X_overlap, X_ner,
              X_Q, X_doc2vec]
    return X, y
Exemplo n.º 8
0
def generate_features(stances, dataset, name, model, binary=True):
    headline, body, y = [], [], []
    for stance in tqdm(stances):
        if (binary != True):
            y.append(LABELS.index(stance['Stance']))
        else:
            if LABELS.index(stance['Stance']) < 3:
                y.append(0)
            else:
                y.append(1)

        headline.append(buildWordVector(stance["Headline"], model))
        body.append(buildWordVector(dataset.articles[stance["Body ID"]],
                                    model))
    concatenated = np.c_[headline, body]
    return concatenated, headline, body, y
def generate_features(stances, dataset, name):
    h, b, y = [], [], []

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_sentiment = gen_or_load_feats(sentiment_features, h, b,
                                    "features/sentiment." + name + ".npy")
    X_ner = gen_or_load_feats(ner_features, h, b,
                              "features/ner." + name + ".npy")
    X_overlap = gen_or_load_feats(word_overlap_features, h, b,
                                  "features/overlap." + name + ".npy")
    X_refuting = gen_or_load_feats(refuting_features, h, b,
                                   "features/refuting." + name + ".npy")
    X_polarity = gen_or_load_feats(polarity_features, h, b,
                                   "features/polarity." + name + ".npy")
    X_hand = gen_or_load_feats(hand_features, h, b,
                               "features/hand." + name + ".npy")
    X_bert = bert_features("features/combined_bert_" + name + ".csv")
    X_cosine = cosine_features(name, "features/cosine." + name + ".npy")

    X = np.c_[X_refuting, X_overlap, X_hand, X_sentiment, X_ner, X_polarity,
              X_bert, X_cosine]

    # X_train_79 = pd.concat([refuting_features, overlap_features, hand_features.loc[:,0:3],hand_features.loc[:,16:],
    #sentiment_features, ner_features, polarity_features, train_combined, pd.Series(cosine_sim_train)], axis=1)

    return X, y
Exemplo n.º 10
0
def generate_features(stances, dataset, name, bow_vectorizer, tfreq_vectorizer,
                      tfidf_vectorizer):
    h, b, y = [], [], []

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_overlap = gen_or_load_feats(word_overlap_features, h, b,
                                  "features/overlap." + name + ".npy")

    # X_refuting_body: I add a new refuting feature about the existence of refuting words in the body
    X_refuting_head, X_refuting_body = refuting_features(h, b)

    X_polarity = gen_or_load_feats(polarity_features, h, b,
                                   "features/polarity." + name + ".npy")
    X_hand = gen_or_load_feats(hand_features, h, b,
                               "features/hand." + name + ".npy")

    # X_senti_head: The sentiment vector of the headline;
    # X_senti_body:  The sentiment vector of the body;
    # X_senti_cos : The cosine similarity between the sentiment vectors of the headline and body.

    X_senti_head, X_senti_body, X_senti_cos = sentiment_features(h, b)

    # X_tf_cos : The cosine similarity between the TF vectors of the headline and body
    # X_tf_idf_cos : The cosine similarity between the TF-IDF vectors of the headline and body.
    X_tf_cos, X_tf_idf_cos = gen_tf_idf_feats(stances, dataset.articles,
                                              bow_vectorizer, tfreq_vectorizer,
                                              tfidf_vectorizer)

    X = np.c_[X_hand, X_polarity, X_refuting_head, X_overlap, X_tf_cos]
    return X, y
Exemplo n.º 11
0
def generate_features(stances, dataset, name):
    print("Generating Features for :", name)
    h, b, y = [], [], []

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_overlap = gen_or_load_feats(word_overlap_features, h, b,
                                  "features/overlap." + name + ".npy")
    X_refuting = gen_or_load_feats(refuting_features, h, b,
                                   "features/refuting." + name + ".npy")
    X_polarity = gen_or_load_feats(polarity_features, h, b,
                                   "features/polarity." + name + ".npy")
    X_hand = gen_or_load_feats(hand_features, h, b,
                               "features/hand." + name + ".npy")
    X_bowv = np.array(
        gen_or_load_feats(bow_averaged_vectors, h, b,
                          "features/bowvec_200dnorm." + name + ".npy"))
    X_bowc = np.array(
        gen_or_load_feats(bow_count_vectors, h, b,
                          "features/bowcount_1000." + name + ".npy"))
    X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_bowv, X_bowc]
    print("... Done. Features :", X.shape[1])
    return X, y
Exemplo n.º 12
0
def preprocess_and_write(dataset, stance_list, tier, out_dir):
    #h, b, y = [],[],[]
    examples = []
    num_exmpls = 0
    for stance in tqdm(stance_list, desc="Preprocessing {}".format(tier)):
        y = LABELS.index(stance['Stance'])
        h = stance['Headline']
        b = dataset.articles[stance['Body ID']]

        h = clean(h)
        h_tokens = get_tokenized_sequences(h)

        b = clean(b)
        b_tokens = get_tokenized_sequences(b)
        examples.append(
            (' '.join(h_tokens), ' '.join(b_tokens), ' '.join([str(y)])))
        num_exmpls = num_exmpls + 1

    print("Processed %i examples" % (num_exmpls))

    # shuffle examples
    indices = range(len(examples))
    np.random.shuffle(indices)

    with open(os.path.join(out_dir, tier +'.headline'), 'w') as headline_file,  \
         open(os.path.join(out_dir, tier +'.body'), 'w') as body_file,\
         open(os.path.join(out_dir, tier +'.stance'), 'w') as stance_file:

        for i in indices:
            (headline, body, stance) = examples[i]
            # write tokenized data to file
            write_to_file(headline_file, headline)
            write_to_file(body_file, body)
            write_to_file(stance_file, stance)
Exemplo n.º 13
0
def generate_features(stances, dataset, name):
    h, b, kh, kb, y = [], [], [], [], []

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])
        kh.append(stance['Key'])
        kb.append(dataset.keys[stance['Body ID']])

    X_overlap = gen_or_load_feats(
        word_overlap_features, h, b, kh, kb,
        "your--path/features/overlap." + name + ".npy")
    #X_refuting = gen_or_load_feats(refuting_features, h, b,kh,kb, "your--path/features/refuting."+name+".npy")
    #X_polarity = gen_or_load_feats(polarity_features, h, b,kh,kb, "your--path/features/polarity."+name+".npy")
    X_hand = gen_or_load_feats(
        hand_features, h, b, kh, kb,
        "your--path/stage1/features/hand." + name + ".npy")
    X_sc = gen_or_load_feats(
        score_feature, h, b, kh, kb,
        "your--path/stage1/features/score." + name + ".npy")
    X_wvs = gen_or_load_feats(
        word_vec_sim, h, b, kh, kb,
        "your--path/stage1/features/wv_sim." + name + ".npy")
    X_fs = gen_or_load_feats(
        features_sim, h, b, kh, kb,
        "your--path/stage1/features/feat_sim." + name + ".npy")
    #X= np.c_[ X_hand,X_overlap,X_polarity,X_sc,X_wvs]
    #X = np.c_[X_hand, X_polarity, X_refuting, X_overlap]
    #X = np.c_[X_hand,X_fs, X_refuting, X_overlap, X_sc]
    X = np.c_[X_hand, X_fs, X_overlap, X_sc, X_wvs]
    #X= np.c_[X_hand, X_fs,X_overlap,X_sc]
    #X= np.c_[ X_hand]
    return X, y
Exemplo n.º 14
0
def generate_features(stances, dataset, name):
    h, b, y = [], [], []
    rows = []
    for stance in stances:
        row = []
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])
        row.append(stance['Headline'])
        row.append(dataset.articles[stance['Body ID']])
        row.append(LABELS.index(stance['Stance']))
        rows.append(row)

    X_overlap = gen_or_load_feats(word_overlap_features, h, b,
                                  "features/overlap." + name + ".npy")
    X_refuting = gen_or_load_feats(refuting_features, h, b,
                                   "features/refuting." + name + ".npy")
    X_polarity = gen_or_load_feats(polarity_features, h, b,
                                   "features/polarity." + name + ".npy")
    X_hand = gen_or_load_feats(hand_features, h, b,
                               "features/hand." + name + ".npy")

    ######Topic Modelling - New Features Added######
    X_NMF = gen_or_load_feats(NMF_cos_50, h, b,
                              "features/nmf." + name + ".npy")
    X_LDA = gen_or_load_feats(LDA_cos_25, h, b,
                              "features/lda-25." + name + ".npy")

    X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_NMF, X_LDA]
    if (name == "competition"):
        if not (os.path.isfile('comp_feature_data.csv')):
            comp_feature_data['stance'] = y
            comp_feature_data['headline'] = h
            comp_feature_data['body_id'] = b
            for i in range(0, X.shape[1]):
                comp_feature_data[i] = X[:, i]

    if (name == "full"):
        if not (os.path.isfile('train_feature_data.csv')):
            train_feature_data['stance'] = y
            train_feature_data['headline'] = h
            train_feature_data['body_id'] = b
            for i in range(0, X.shape[1]):
                train_feature_data[i] = X[:, i]
    return X, y
Exemplo n.º 15
0
def generate_features(stances, dataset, name):
    h, b, y = [], [], []

    for stance in stances:
        if params.run_2_class:
            if name != 'competition':
                y.append(LABELS_RELATED.index(stance['Stance']))
            else:
                y.append(LABELS.index(stance['Stance']))
        else:
            y.append(LABELS.index(stance['Stance']))

        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_overlap = gen_or_load_feats(word_overlap_features, h, b,
                                  "features/overlap." + name + ".npy")
    X_refuting = gen_or_load_feats(refuting_features, h, b,
                                   "features/refuting." + name + ".npy")
    X_polarity = gen_or_load_feats(polarity_features, h, b,
                                   "features/polarity." + name + ".npy")
    X_hand = gen_or_load_feats(hand_features, h, b,
                               "features/hand." + name + ".npy")
    X_overlap_quotes = gen_or_load_feats(
        word_overlap_quotes_features, h, b,
        "features/overlap_quotes." + name + ".npy")
    X_overlap_pos = gen_or_load_feats(word_overlap_pos_features, h, b,
                                      "features/overlap_pos." + name + ".npy")
    X_overlap_pos_sentence = gen_or_load_feats(
        word_overlap_split_bodies_features, h, b,
        "features/overlap_pos_sentence_split_bodies." + name + ".npy")
    X_tfidf = gen_or_load_feats(word_tfidf_features, h, b,
                                "features/tfidf_pos." + name + ".npy")
    X_tfidf_max = gen_or_load_feats(word_tfidf_pos_ss_features, h, b,
                                    "features/tfidf_pos_max." + name + ".npy")
    X_overlap_bpe_SS = gen_or_load_feats(
        word_overlap_bpe_features, h, b,
        "features/overlap_bpe_nltk_tag3." + name + ".npy")

    X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_overlap_quotes,
              X_overlap_pos, X_overlap_pos_sentence, X_tfidf, X_tfidf_max,
              X_overlap_bpe_SS]
    return X, y
def init_features(stances, dataset, repl):
    id, h, b, y = [], [], [], []

    for stance in stances:
        id.append(stance['Stance ID'])
        s = stance['Stance']
        y.append(LABELS.index(repl[s] if s in repl else s))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    return id, h, b, y
Exemplo n.º 17
0
def generate_features(stances, dataset, name, model, mode, binary=True):
    headline, body, y = [], [], []
    for stance in tqdm(stances):
        if (binary != True):
            y.append(LABELS.index(stance['Stance']))
        else:
            if LABELS.index(stance['Stance']) < 3:
                y.append(0)
            else:
                y.append(1)

        d = []
        d.append(' '.join(cleantext(stance["Headline"])))
        d.append(' '.join(cleantext(dataset.articles[stance["Body ID"]])))
        #print(d)

        vector = TfidfVectorizer(min_df=1, tokenizer=None)
        tfidf = vector.fit_transform(d)
        vocab = vector.vocabulary_
        #print(len(vocab))
        #clean_headline = clean(stance['Headline'])
        #clean_body = clean(dataset.articles[stance['Body ID']])
        #tokenized_headline = get_tokenized_lemmas(clean_headline)
        #tokenized_body = get_tokenized_lemmas(clean_body)
        headline.append(
            buildWordVector(stance["Headline"],
                            model,
                            mode,
                            vocab,
                            tfidf,
                            flag=1))
        body.append(
            buildWordVector(dataset.articles[stance["Body ID"]],
                            model,
                            mode,
                            vocab,
                            tfidf,
                            flag=0))
    concatenated = np.c_[headline, body]

    return concatenated, headline, body, y
Exemplo n.º 18
0
def compute_ub(slaves,stances):
    actual = []
    for stance in stances:
        actual.append(LABELS.index(stance['Stance']))

    predicted = []
    for classifier in slaves:
        pred = classifier.predict(stances)
        pred = [LABELS.index(p) for p in pred]
        predicted.append(pred)

    oracle = 0
    predicted = list(zip(*predicted))
    for i,cls in enumerate(actual):
        if cls in predicted[i]:
            oracle += 1

    print(oracle)
    print(len(actual))

    print(oracle/len(actual))
Exemplo n.º 19
0
def generate_features(stances, dataset, name):
    h, b, y = [], [], []

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_overlap = gen_or_load_feats(word_overlap_features, h, b,
                                  "features/overlap." + name + ".npy")

    X = np.c_[X_hand, X_polarity, X_refuting, X_overlap]
    return X, y
Exemplo n.º 20
0
def generate_features(stances, dataset, name):
    h, b, y = [], [], []

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_embedding = gen_or_load_feats(word_features, h, b,
                                    "features/embedding." + name + ".npy")
    #return X_embedding,y
    X = np.c_[X_embedding]
    print(type(X[0][0]))
    return X, y
Exemplo n.º 21
0
def generate_features(stances,dataset,name):
    h, b, y = [],[],[]

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap."+name+".npy")
    X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting."+name+".npy")
    X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity."+name+".npy")
    X_hand = gen_or_load_feats(hand_features, h, b, "features/hand."+name+".npy")

    X = np.c_[X_hand, X_polarity, X_refuting, X_overlap]
    return X,y
Exemplo n.º 22
0
def generate_features(stances,dataset,name):
    h, b, y = [],[],[]

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap."+name+".npy")
    X_hand = gen_or_load_feats(hand_features, h, b, "features/hand."+name+".npy")

    X_tf_idf = gen_or_load_feats(tf_idf_features, h, b, "features/tf_idf."+name+".npy")
    X_sentiment = gen_or_load_feats(sentiment_features, h, b, "features/sentiment."+name+".npy")

    X = np.c_[X_hand, X_overlap, X_tf_idf, X_sentiment]
    return X, y
Exemplo n.º 23
0
def generate_features_second_layer(stances, dataset, name):
    h, b, y = [], [], []

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap." + name + ".npy")
    X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting." + name + ".npy")
    X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity." + name + ".npy")
    X_hand = gen_or_load_feats(hand_features, h, b, "features/hand." + name + ".npy")
    X_overlap2 = gen_or_load_feats(bow_overlap_features, h, b, "features/bow_overlap." + name + ".npy")

    X = np.c_[X_hand, X_refuting, X_polarity, X_overlap, X_overlap2]
    return X, y
Exemplo n.º 24
0
def generate_features(stances,dataset,name):
    h, b, y = [],[],[]

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap."+name+".npy")
    X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting."+name+".npy")
    X_agree = gen_or_load_feats(agree_features, h, b, "features/agree." + name + ".npy")
    X_discuss = gen_or_load_feats(discuss_features, h, b, "features/discuss." + name + ".npy")
    X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity."+name+".npy")
    X_hand = gen_or_load_feats(hand_features, h, b, "features/hand."+name+".npy")

    X = np.c_[X_hand, X_polarity, X_discuss, X_agree, X_refuting, X_overlap]
    return X,y
Exemplo n.º 25
0
def generate_features(stances, dataset, name):
    h, b, y = [], [], []

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    #X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap."+name+".npy")
    #X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting."+name+".npy")
    #X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity."+name+".npy")
    #X_hand = gen_or_load_feats(hand_features, h, b, "features/hand."+name+".npy")

    #X = np.c_[X_hand, X_polarity, X_refuting, X_overlap]
    #X = gen_or_load_feats(word2VecFeature, h, b, "features/word2VecFeature."+name+".npy")
    X = gen_or_load_feats(entity, h, b, "features/entity." + name + ".npy")

    return X, y
Exemplo n.º 26
0
def generate_features(stances, dataset, name):
    h, b, y = [], [], []

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_overlap = gen_or_load_feats(word_overlap_features, h, b,
                                  "features/overlap." + name + ".npy")
    X_refuting = gen_or_load_feats(refuting_features, h, b,
                                   "features/refuting." + name + ".npy")
    X_polarity = gen_or_load_feats(polarity_features, h, b,
                                   "features/polarity." + name + ".npy")
    X_hand = gen_or_load_feats(hand_features, h, b,
                               "features/hand." + name + ".npy")

    X_tf_idf = gen_or_load_feats(tf_idf_features, h, b,
                                 "features/tf_idf." + name + ".npy")

    X_svd = gen_or_load_feats(svd_features, h, b,
                              "features/svd." + name + ".npy")

    X_sentiment = gen_or_load_feats(sentiment_features, h, b,
                                    "features/sentiment." + name + ".npy")

    X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_tf_idf, X_svd,
              X_sentiment]

    # X = np.c_[X_hand, X_polarity, X_refuting, X_overlap]

    # pdb.set_trace()

    # from matplotlib import pyplot as plt
    # plt.plot(X_tf_idf.flatten())
    # plt.show()

    # plt.plot(X_svd.flatten())
    # plt.show()

    return X, y
Exemplo n.º 27
0
def generate_features(stances, dataset, name):
    h, b, y = [], [], []

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_head_w2v, X_body_w2v = gen_or_load_feats(
        gen_w2v, h, b, FILENAME + "w2v." + name + ".npy")
    X_overlap = gen_or_load_feats(word_overlap_features, h, b,
                                  FILENAME + "overlap." + name + ".npy")
    X_refuting = gen_or_load_feats(refuting_features, h, b,
                                   FILENAME + "refuting." + name + ".npy")
    X_polarity = gen_or_load_feats(polarity_features, h, b,
                                   FILENAME + "polarity." + name + ".npy")
    X_hand = gen_or_load_feats(hand_features, h, b,
                               FILENAME + "hand." + name + ".npy")

    X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_head_w2v,
              X_body_w2v]
    return X, y
Exemplo n.º 28
0
def generate_features(stances, dataset, name, only_related=False):
    h, b, y, y_bi = [], [], [], []

    related_dir = "re_" if only_related else ""
    for stance in stances:
        y_bi.append(stance['Stance_biClass'])
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_overlap = gen_or_load_feats(
        word_overlap_features, h, b,
        "features/" + related_dir + "overlap." + name + ".npy")
    X_refuting = gen_or_load_feats(
        refuting_features, h, b,
        "features/" + related_dir + "refuting." + name + ".npy")
    X_polarity = gen_or_load_feats(
        polarity_features, h, b,
        "features/" + related_dir + "polarity." + name + ".npy")
    X_sentiment = gen_or_load_feats(
        sentiment_analyzer, h, b,
        "features/" + related_dir + "sentiment." + name + ".npy")
    X_hand = gen_or_load_feats(
        hand_features, h, b,
        "features/" + related_dir + "hand." + name + ".npy")
    X_ner = gen_or_load_feats(
        name_entity_similarity, h, b,
        "features/" + related_dir + "ner." + name + ".npy")
    X_Q = gen_or_load_feats(name_entity_similarity, h, b,
                            "features/" + related_dir + "Q." + name + ".npy")
    X_doc2vec = gen_or_load_feats(
        doc2vec_feature, h, b,
        "features/" + related_dir + "doc2vec." + name + ".npy")

    X = np.c_[X_hand, X_sentiment, X_polarity, X_refuting, X_overlap, X_ner,
              X_Q, X_doc2vec]
    return X, y, y_bi
Exemplo n.º 29
0
def generate_features(stances,dataset,name):
    h, b, y = [],[],[]

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap."+name+".npy")
    #print("overlap:")
    #print(X_overlap)
    X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting."+name+".npy")
    #print("X_refuting:")
    #print(X_refuting)
    X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity."+name+".npy")
    X_hand = gen_or_load_feats(hand_features, h, b, "features/hand."+name+".npy")
    #print(x_hand.z)
    X_w2v_body = gen_or_load_feats(w2v_body_feature_features, h ,b, "features/word."+name+".npy")
    #print(X_word)
    X_w2v_head = gen_or_load_feats(w2v_head_feature_features, h ,b, "features/head."+name+".npy")
    X = np.c_[X_hand, X_polarity, X_refuting, X_overlap,X_w2v_body,X_w2v_head]


    return X,y
Exemplo n.º 30
0
def generate_features(stances, dataset, name, number_of_words=5000):
    h, b, y = [], [], []

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    vocabulary = vocabularyForm(h, b, number_of_words)

    #print(vocabulary)
    if name != "vocab":
        X_overlap = gen_or_load_feats(word_overlap_features, h, b,
                                      "features/overlap." + name + ".npy")
        X_refuting = gen_or_load_feats(refuting_features, h, b,
                                       "features/refuting." + name + ".npy")
        X_polarity = gen_or_load_feats(polarity_features, h, b,
                                       "features/polarity." + name + ".npy")
        X_hand = gen_or_load_feats(hand_features, h, b,
                                   "features/hand." + name + ".npy")
        X_Bow = calculateBOW(h, b, vocabulary)

        infersent = torch.load('infersent.allnli.pickle',
                               map_location=lambda storage, loc: storage)

        infersent.set_glove_path('dataset/glove.840B.300d.txt')
        sentences = []
        for i, j in zip(h, b):
            sentences.append(i)

        infersent.build_vocab(sentences, tokenize=True)
        X_embed = infersent.encode(sentences, tokenize=True)

        X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_Bow, X_embed]

        return X, y
Exemplo n.º 31
0
def generate_features(stances, dataset, name):
    h, b, y = [], [], []

    for stance in stances:
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])

    X_overlap = gen_or_load_feats(word_overlap_features, h, b,
                                  "features/overlap." + name + ".npy")
    X_refuting = gen_or_load_feats(refuting_features, h, b,
                                   "features/refuting." + name + ".npy")
    X_polarity = gen_or_load_feats(polarity_features, h, b,
                                   "features/polarity." + name + ".npy")
    X_hand = gen_or_load_feats(hand_features, h, b,
                               "features/hand." + name + ".npy")

    X_tf_idf = gen_or_load_feats(tf_idf_features, h, b,
                                 "features/tf_idf." + name + ".npy")

    X_svd = gen_or_load_feats(svd_features, h, b,
                              "features/svd." + name + ".npy")

    X_sentiment = gen_or_load_feats(sentiment_features, h, b,
                                    "features/sentiment." + name + ".npy")

    X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_tf_idf, X_svd,
              X_sentiment]

    # X = np.c_[X_hand, X_polarity, X_refuting, X_overlap]

    # from matplotlib import pyplot as plt
    # compound = sorted(zip(X_sentiment[:,3]-X_sentiment[:,7], y), key = lambda x: x[1])
    # plt.plot(compound)
    # plt.show()

    # fig1 = plt.gcf()
    # overlap = sorted(zip(X_refuting.flatten(), y), key = lambda x: x[1])
    # plt.plot(overlap)
    # plt.draw()
    # plt.show()
    # fig1.savefig('refuting.png')

    # fig2 = plt.gcf()
    # overlap = sorted(zip(X_hand.flatten(), y), key = lambda x: x[1])
    # plt.plot(overlap)
    # plt.draw()
    # plt.show()
    # fig2.savefig('hand.png')

    # fig3 = plt.gcf()
    # overlap = sorted(zip(X_polarity.flatten(), y), key = lambda x: x[1])
    # plt.plot(overlap)
    # plt.draw()
    # plt.show()
    # fig3.savefig('polarity.png')

    # fig4 = plt.gcf()
    # tfidf = sorted(zip(X_tf_idf.flatten(), y), key = lambda x: x[1])
    # plt.plot(tfidf)
    # plt.draw()
    # plt.show()
    # fig4.savefig('tfidf.png')

    # fig5 = plt.gcf()
    # svd = sorted(zip(X_svd.flatten(), y), key = lambda x: x[1])
    # plt.plot(svd)
    # plt.draw()
    # plt.show()
    # fig5.savefig('svd.png')

    # fig6 = plt.gcf()
    # svd = sorted(zip(X_sentiment[:,3] - X_sentiment[:,7], y), key = lambda x: x[1])
    # plt.plot(svd)
    # plt.draw()
    # plt.show()
    # fig6.savefig('sentiment.png')

    return X, y