def tweetscore(sentence): features = feature_extract.dialogue_act_features(sentence, topic_mod) features_vec = vec.transform(features) score = classifier.decision_function(features_vec)[0] percentage = int(round(2.0 * (1.0 / (1.0 + np.exp(-score)) - 0.5) * 100.0)) return percentage
def tweetscore(sentence): features = feature_extract.dialogue_act_features(sentence,topic_mod) features_vec = vec.transform(features) score = classifier.decision_function(features_vec)[0] percentage = int(round(2.0*(1.0/(1.0+np.exp(-score))-0.5)*100.0)) return percentage
print 'Training topics' topic_mod = topic.topic(nbtopic=200,alpha='symmetric') topic_mod.fit(np.concatenate((pos_data,neg_data))) print 'Feature eng' # label set cls_set = ['Non-Sarcastic','Sarcastic'] featuresets = [] index=0 for tweet in pos_data: if (np.mod(index,10000)==0): print "Positive tweet processed: ",index featuresets.append((feature_extract.dialogue_act_features(tweet,topic_mod),cls_set[1])) index+=1 index=0 for tweet in neg_data: if (np.mod(index,10000)==0): print "Negative tweet processed: ",index featuresets.append((feature_extract.dialogue_act_features(tweet,topic_mod),cls_set[0])) index+=1 featuresets=np.array(featuresets) targets=(featuresets[0::,1]=='Sarcastic').astype(int) print 'Dictionnary vectorizer' vec = DictVectorizer() featurevec = vec.fit_transform(featuresets[0::,0])
from sklearn.metrics import classification_report from sklearn.feature_extraction import DictVectorizer import pickle import feature_extract import heapq pos_data = np.load('posproc.npy') neg_data = np.load('negproc.npy') class_set = ['Non-Sarcastic', 'Sarcastic'] featuresets = [] index = 0 for tweet in pos_data: if (np.mod(index, 10000) == 0): print "Positive tweet processed: ", index featuresets.append((feature_extract.dialogue_act_features(tweet), class_set[1])) index += 1 index = 0 for tweet in neg_data: if (np.mod(index, 10000) == 0): print "Negative tweet processed: ", index featuresets.append((feature_extract.dialogue_act_features(tweet), class_set[0])) index += 1 featuresets = np.array(featuresets) targets = (featuresets[0::, 1] == 'Sarcastic').astype(int) vec = DictVectorizer() featurevec = vec.fit_transform(featuresets[0::, 0]) file_Name = "vecdict.p"
print('Training topics') topic_mod = topic.topic(nbtopic=200, alpha='symmetric') topic_mod.fit(np.concatenate((pos_data, neg_data))) print('Feature eng') # label set cls_set = ['Non-Sarcastic', 'Sarcastic'] featuresets = [] index = 0 for tweet in pos_data: if (np.mod(index, 10000) == 0): print("Positive tweet processed: ", index) featuresets.append( (feature_extract.dialogue_act_features(tweet, topic_mod), cls_set[1])) index += 1 index = 0 for tweet in neg_data: if (np.mod(index, 10000) == 0): print("Negative tweet processed: ", index) featuresets.append( (feature_extract.dialogue_act_features(tweet, topic_mod), cls_set[0])) index += 1 featuresets = np.array(featuresets) targets = (featuresets[0::, 1] == 'Sarcastic').astype(int) print('Dictionnary vectorizer') vec = DictVectorizer()
print 'Before Pickling' pkl_file_1 = open('vecdict.p') vec = pickle.load(pkl_file_1) pkl_file = open('classif.p', 'rb') classifier = pickle.load(pkl_file) basic_test = [] file_test = open('test_MLWARE1.csv', 'r') file_test_read = csv.reader(file_test) i = 0 for each_tweet in file_test_read: if i != 0: basic_test.append(each_tweet[1]) else: i = 1 feature_basictest = [] for tweet in basic_test: feature_basictest.append( feature_extract.dialogue_act_features(tweet, topic_mod)) feature_basictest = np.array(feature_basictest) feature_basictestvec = vec.transform(feature_basictest) print classifier.predict(feature_basictestvec) file_test_open = open('test', 'w') # file_test_write = csv.writer(file_test_open) output = list(classifier.predict(feature_basictestvec)) for each_output in output: file_test_open.write(str(each_output)) file_test_open.write('\n') # file_test_write.writerow(each_output)
vec_file = open("vecdict.p", "rb") vec = pickle.load(vec_file) vec_file.close() classifier_file = open("classif.p", "rb") classifier = pickle.load(classifier_file) classifier_file.close() topic_mod_file = open("topic_mod.p", "rb") topic_mod = pickle.load(topic_mod_file) topic_mod_file.close() #BASIC TEST while (True): print("Enter the text to test: ") text = input() features = [] features.append(feature_extract.dialogue_act_features(text, topic_mod)) features = np.array(features) feature_vec = vec.transform(features) ans = classifier.predict(feature_vec) if ans == 1: print("The text is sarcastic") else: print("The text is non-sarcastic") print("The score of the text is: " + str(classifier.decision_function(feature_vec)))