from load_data import load_CVAW from load_data import load_NTUSD def save_list(filename, list): with open(filename,'w',encoding='utf-8') as f: for l in list: if l != "": f.write(l+"\n") lexicon_data = load_CVAW() cvaw_words = [line[0] for line in lexicon_data] print('The words in CVAW lexicons: %s' % str(cvaw_words)) NTUSD_positive_words = load_NTUSD('./resources/ntusd-positive (zh-tw).txt') NTUSD_negative_words = load_NTUSD('./resources/ntusd-negative (zh-tw).txt') print('NTUSD') print("Positive: %s"%str(NTUSD_positive_words)) print("Negative: %s"%str(NTUSD_negative_words)) print('same words in ntusd_postive') print(sorted(list(set(NTUSD_positive_words) & set(cvaw_words)))) print('same words in ntusd_negative') print(sorted(list(set(NTUSD_negative_words) & set(cvaw_words)))) common_words = sorted(list((set(NTUSD_positive_words) & set(cvaw_words)) | (set(NTUSD_negative_words) & set(cvaw_words)))) ntusd_p = (sorted(list(set(NTUSD_positive_words) - set(cvaw_words)))) ntusd_n = (sorted(list(set(NTUSD_negative_words) - set(cvaw_words)))) not_in_CVAW = sorted(list((set(NTUSD_positive_words) - set(cvaw_words)) | (set(NTUSD_negative_words) - set(cvaw_words)))) save_list('./resources/common_words.txt', common_words) save_list('./resources/different_words.txt', not_in_CVAW) save_list('./resources/NTUSD_p.txt', ntusd_p)
print('The predicted values is (using Geometric Average): %s'% predicted_value_g) geometric.append(predicted_value_g) print('The true values is: %s' % true_values[i]) return arithmetic, geometric if __name__ == '__main__': ########################################### Hyper-parameters ########################################### target = 'arousal' # values: "valence", "arousal" categorical = 'all' # values: 'all', "book", "car", "laptop", "hotel", "news", "political" ######################################################################################################## # texts, valence, arousal = read_mix_data(categorical) from load_data import load_CVAT_3 # texts, valence, arousal = load_CVAT_3('./resources/corpus 2009 sigma 1.5.csv','./resources/tokenized_texts.p', categorical=categorical) texts, valence, arousal = load_CVAT_3('./resources/CVAT (utf-8).csv','./resources/tokenized_texts_(newest3.31).p', categorical=categorical) lexicon = load_CVAW() d = dict() if target == 'valence': ind = 1 true_values = valence print('Valence prediction...') elif target == 'arousal': ind = 2 true_values = arousal print('Arousal preddiction...') for l in lexicon: d[l[0]] = l[ind] arithmetic, geometric = va_prediction(texts, d, true_values) print('Prediction result (arithmetic average):') regression_evaluate(true_values, arithmetic)
# texts, valence, arousal = load_CVAT_2("./resources/valence_arousal(sigma=1.5).csv", categorical=categorical) from load_data import load_CVAT_3 # texts, valence, arousal = load_CVAT_3('./resources/corpus 2009 sigma 1.5.csv','./resources/tokenized_texts.p', categorical=categorical) # texts, valence, arousal = load_CVAT_3('./resources/valence_arousal(sigma=1.5).csv','./resources/tokenized_texts_(old).p', categorical=categorical) from mix_data import read_mix_data texts, valence, arousal = read_mix_data(categorical) if option == 'V': Y = valence elif option == 'A': Y = arousal else: raise Exception('Wrong parameters!') lexicon = load_CVAW(extended=using_extended_lexicon) d = dict() ind = 1 if option == 'V' else 2 for l in lexicon: d[l[0]] = l[ind] predicted_ratings = mean_ratings(texts, d, mean_method, Y) print(predicted_ratings) print(Y) out = regression_evaluate(Y, predicted_ratings) draw_scatter(Y, predicted_ratings, 'True Values', 'Predicted Values', title='Scatter') out2 = cv(predicted_ratings, Y) Dims = 'Valence' if option == 'V' else 'Arousal'
# texts, valence, arousal = load_CVAT_2("./resources/valence_arousal(sigma=1.5).csv", categorical=categorical) from load_data import load_CVAT_3 # texts, valence, arousal = load_CVAT_3('./resources/corpus 2009 sigma 1.5.csv','./resources/tokenized_texts.p', categorical=categorical) # texts, valence, arousal = load_CVAT_3('./resources/valence_arousal(sigma=1.5).csv','./resources/tokenized_texts_(old).p', categorical=categorical) from mix_data import read_mix_data texts, valence, arousal = read_mix_data(categorical) if option == 'V': Y = valence elif option == 'A': Y = arousal else: raise Exception('Wrong parameters!') lexicon = load_CVAW(extended=using_extended_lexicon) d = dict() ind = 1 if option == 'V' else 2 for l in lexicon: d[l[0]] = l[ind] predicted_ratings = mean_ratings(texts, d, mean_method, Y) print(predicted_ratings) print(Y) out = regression_evaluate(Y, predicted_ratings) draw_scatter(Y, predicted_ratings, 'True Values', 'Predicted Values', title='Scatter')
from load_data import load_CVAW from load_data import load_NTUSD def save_list(filename, list): with open(filename, 'w', encoding='utf-8') as f: for l in list: if l != "": f.write(l + "\n") lexicon_data = load_CVAW() cvaw_words = [line[0] for line in lexicon_data] print('The words in CVAW lexicons: %s' % str(cvaw_words)) NTUSD_positive_words = load_NTUSD('./resources/ntusd-positive (zh-tw).txt') NTUSD_negative_words = load_NTUSD('./resources/ntusd-negative (zh-tw).txt') print('NTUSD') print("Positive: %s" % str(NTUSD_positive_words)) print("Negative: %s" % str(NTUSD_negative_words)) print('same words in ntusd_postive') print(sorted(list(set(NTUSD_positive_words) & set(cvaw_words)))) print('same words in ntusd_negative') print(sorted(list(set(NTUSD_negative_words) & set(cvaw_words)))) common_words = sorted( list((set(NTUSD_positive_words) & set(cvaw_words)) | (set(NTUSD_negative_words) & set(cvaw_words)))) ntusd_p = (sorted(list(set(NTUSD_positive_words) - set(cvaw_words)))) ntusd_n = (sorted(list(set(NTUSD_negative_words) - set(cvaw_words)))) not_in_CVAW = sorted(
if __name__ == '__main__': ########################################### Hyper-parameters ########################################### target = 'arousal' # values: "valence", "arousal" categorical = 'all' # values: 'all', "book", "car", "laptop", "hotel", "news", "political" ######################################################################################################## # texts, valence, arousal = read_mix_data(categorical) from load_data import load_CVAT_3 # texts, valence, arousal = load_CVAT_3('./resources/corpus 2009 sigma 1.5.csv','./resources/tokenized_texts.p', categorical=categorical) texts, valence, arousal = load_CVAT_3( './resources/CVAT (utf-8).csv', './resources/tokenized_texts_(newest3.31).p', categorical=categorical) lexicon = load_CVAW() d = dict() if target == 'valence': ind = 1 true_values = valence print('Valence prediction...') elif target == 'arousal': ind = 2 true_values = arousal print('Arousal preddiction...') for l in lexicon: d[l[0]] = l[ind] arithmetic, geometric = va_prediction(texts, d, true_values) print('Prediction result (arithmetic average):') regression_evaluate(true_values, arithmetic)