示例#1
0
def tweetscore(sentence):

    features = feature_extract.dialogue_act_features(sentence, topic_mod)
    features_vec = vec.transform(features)
    score = classifier.decision_function(features_vec)[0]
    percentage = int(round(2.0 * (1.0 / (1.0 + np.exp(-score)) - 0.5) * 100.0))

    return percentage
def tweetscore(sentence):
    
    features = feature_extract.dialogue_act_features(sentence,topic_mod)
    features_vec = vec.transform(features)
    score = classifier.decision_function(features_vec)[0]
    percentage = int(round(2.0*(1.0/(1.0+np.exp(-score))-0.5)*100.0))
    
    return percentage

    
print 'Training topics'

topic_mod = topic.topic(nbtopic=200,alpha='symmetric')
topic_mod.fit(np.concatenate((pos_data,neg_data)))

print 'Feature eng'
# label set
cls_set = ['Non-Sarcastic','Sarcastic']
featuresets = [] 

index=0
for tweet in pos_data:
    if (np.mod(index,10000)==0):
        print "Positive tweet processed: ",index
    featuresets.append((feature_extract.dialogue_act_features(tweet,topic_mod),cls_set[1]))
    index+=1
 
index=0
for tweet in neg_data:
    if (np.mod(index,10000)==0):
        print "Negative tweet processed: ",index
    featuresets.append((feature_extract.dialogue_act_features(tweet,topic_mod),cls_set[0]))
    index+=1
        
featuresets=np.array(featuresets)
targets=(featuresets[0::,1]=='Sarcastic').astype(int)

print 'Dictionnary vectorizer'
vec = DictVectorizer()
featurevec = vec.fit_transform(featuresets[0::,0])
示例#4
0
from sklearn.metrics import classification_report
from sklearn.feature_extraction import DictVectorizer
import pickle
import feature_extract
import heapq

pos_data = np.load('posproc.npy')
neg_data = np.load('negproc.npy')
class_set = ['Non-Sarcastic', 'Sarcastic']
featuresets = []

index = 0
for tweet in pos_data:
    if (np.mod(index, 10000) == 0):
        print "Positive tweet processed: ", index
    featuresets.append((feature_extract.dialogue_act_features(tweet), class_set[1]))
    index += 1

index = 0
for tweet in neg_data:
    if (np.mod(index, 10000) == 0):
        print "Negative tweet processed: ", index
    featuresets.append((feature_extract.dialogue_act_features(tweet), class_set[0]))
    index += 1

featuresets = np.array(featuresets)
targets = (featuresets[0::, 1] == 'Sarcastic').astype(int)
vec = DictVectorizer()
featurevec = vec.fit_transform(featuresets[0::, 0])

file_Name = "vecdict.p"
示例#5
0
print('Training topics')

topic_mod = topic.topic(nbtopic=200, alpha='symmetric')
topic_mod.fit(np.concatenate((pos_data, neg_data)))

print('Feature eng')
# label set
cls_set = ['Non-Sarcastic', 'Sarcastic']
featuresets = []

index = 0
for tweet in pos_data:
    if (np.mod(index, 10000) == 0):
        print("Positive tweet processed: ", index)
    featuresets.append(
        (feature_extract.dialogue_act_features(tweet, topic_mod), cls_set[1]))
    index += 1

index = 0
for tweet in neg_data:
    if (np.mod(index, 10000) == 0):
        print("Negative tweet processed: ", index)
    featuresets.append(
        (feature_extract.dialogue_act_features(tweet, topic_mod), cls_set[0]))
    index += 1

featuresets = np.array(featuresets)
targets = (featuresets[0::, 1] == 'Sarcastic').astype(int)

print('Dictionnary vectorizer')
vec = DictVectorizer()
示例#6
0
print 'Training topics'

topic_mod = topic.topic(nbtopic=200,alpha='symmetric')
topic_mod.fit(np.concatenate((pos_data,neg_data)))

print 'Feature eng'
# label set
cls_set = ['Non-Sarcastic','Sarcastic']
featuresets = [] 

index=0
for tweet in pos_data:
    if (np.mod(index,10000)==0):
        print "Positive tweet processed: ",index
    featuresets.append((feature_extract.dialogue_act_features(tweet,topic_mod),cls_set[1]))
    index+=1
 
index=0
for tweet in neg_data:
    if (np.mod(index,10000)==0):
        print "Negative tweet processed: ",index
    featuresets.append((feature_extract.dialogue_act_features(tweet,topic_mod),cls_set[0]))
    index+=1
        
featuresets=np.array(featuresets)
targets=(featuresets[0::,1]=='Sarcastic').astype(int)

print 'Dictionnary vectorizer'
vec = DictVectorizer()
featurevec = vec.fit_transform(featuresets[0::,0])
print 'Before Pickling'
pkl_file_1 = open('vecdict.p')
vec = pickle.load(pkl_file_1)
pkl_file = open('classif.p', 'rb')
classifier = pickle.load(pkl_file)
basic_test = []
file_test = open('test_MLWARE1.csv', 'r')
file_test_read = csv.reader(file_test)
i = 0

for each_tweet in file_test_read:
    if i != 0:
        basic_test.append(each_tweet[1])
    else:
        i = 1
feature_basictest = []
for tweet in basic_test:
    feature_basictest.append(
        feature_extract.dialogue_act_features(tweet, topic_mod))
feature_basictest = np.array(feature_basictest)
feature_basictestvec = vec.transform(feature_basictest)
print classifier.predict(feature_basictestvec)

file_test_open = open('test', 'w')
# file_test_write = csv.writer(file_test_open)
output = list(classifier.predict(feature_basictestvec))
for each_output in output:
    file_test_open.write(str(each_output))
    file_test_open.write('\n')
    # file_test_write.writerow(each_output)
示例#8
0
vec_file = open("vecdict.p", "rb")
vec = pickle.load(vec_file)
vec_file.close()

classifier_file = open("classif.p", "rb")
classifier = pickle.load(classifier_file)
classifier_file.close()

topic_mod_file = open("topic_mod.p", "rb")
topic_mod = pickle.load(topic_mod_file)
topic_mod_file.close()

#BASIC TEST

while (True):
    print("Enter the text to test: ")
    text = input()
    features = []
    features.append(feature_extract.dialogue_act_features(text, topic_mod))
    features = np.array(features)
    feature_vec = vec.transform(features)
    ans = classifier.predict(feature_vec)
    if ans == 1:
        print("The text is sarcastic")
    else:
        print("The text is non-sarcastic")

    print("The score of the text is: " +
          str(classifier.decision_function(feature_vec)))