__author__ = "hs"
__author__ = "hs"
__author__ = "NLP-PC"
import feature_generating
import classifiers
import analysis
from load_data import load_train_data, load_processed_data
from load_data import load_test_data
from save_data import dump_picle
from vectorizers import TFIDF_estimator, anew_estimator
from analysis import analysis_result
from classifiers import mNB
from load_data import load_selected_data

print("Start")
vectorizer = TFIDF_estimator()
texts, train_labels = load_selected_data(data_type="train")
transformed_train = vectorizer.fit_transform(texts)
testdata, true_labels = load_selected_data(data_type="test")
transformed_test = vectorizer.transform(testdata)

predict = mNB(transformed_train, train_labels, transformed_test)

analysis_result(predict, true_labels)
        occur_times = (word_count >= 1).sum()
        if occur_times > 0:
            avg = np.average(np.array(valence)[word_count >= 1])
        else:
            avg = -1
        texts_scores.append(avg)
    return texts_scores


if __name__ == '__main__':
    from load_data import load_selected_data, load_anew, load_extend_anew

    # # print(count_matching(texts, words))


    texts, labels = load_selected_data(data_type='train', stem=False)
    words, valence, _ = load_anew()

    # select_matching(texts,labels, words)
    # exit()
    words, valence = np.array(words), np.array(valence)
    words_pos, valence_pos = words[valence > np.average(valence)], valence[
        valence > np.average(valence)]  # avg = 5.1511713456
    words_neg, valence_neg = words[valence < np.average(valence)], valence[
        valence < np.average(valence)]  # avg = 5.1511713456

    pos = avg_valence(texts, words_pos, valence_pos)
    neg = avg_valence(texts, words_neg, valence_neg)

    from visualization import draw_scatter_with_color
        word_count = np.array(word_count)
        occur_times = (word_count >= 1).sum()
        if occur_times > 0:
            avg = np.average(np.array(valence)[word_count >= 1])
        else:
            avg = -1
        texts_scores.append(avg)
    return texts_scores


if __name__ == '__main__':
    from load_data import load_selected_data, load_anew, load_extend_anew

    # # print(count_matching(texts, words))

    texts, labels = load_selected_data(data_type='train', stem=False)
    words, valence, _ = load_anew()

    # select_matching(texts,labels, words)
    # exit()
    words, valence = np.array(words), np.array(valence)
    words_pos, valence_pos = words[valence > np.average(valence)], valence[
        valence > np.average(valence)]  # avg = 5.1511713456
    words_neg, valence_neg = words[valence < np.average(valence)], valence[
        valence < np.average(valence)]  # avg = 5.1511713456

    pos = avg_valence(texts, words_pos, valence_pos)
    neg = avg_valence(texts, words_neg, valence_neg)

    from visualization import draw_scatter_with_color
Exemplo n.º 4
0
__author__ = 'hs'
__author__ = 'hs'
__author__ = 'NLP-PC'
import feature_generating
import classifiers
import analysis
from load_data import load_train_data, load_processed_data
from load_data import load_test_data
from save_data import dump_picle
from vectorizers import TFIDF_estimator, anew_estimator
from analysis import analysis_result
from classifiers import mNB
from load_data import load_selected_data

print('Start')
vectorizer = TFIDF_estimator()
texts, train_labels = load_selected_data(data_type='train')
transformed_train = vectorizer.fit_transform(texts)
testdata, true_labels = load_selected_data(data_type='test')
transformed_test = vectorizer.transform(testdata)

predict = mNB(transformed_train, train_labels, transformed_test)

analysis_result(predict, true_labels)