示例#1
0
 def test_model(texts, classes, model, folds=5):
     (model, params) = model
     classes = np.array(classes)
     texts = np.array(texts)
     auc_sum = 0
     
     for train, test in cross_validation.StratifiedKFold(classes, folds):
         texts_train = texts[train]
         classes_train = classes[train]
         texts_test = texts[test]
         classes_test = classes[test]
         s = model(texts_train, classes_train, *params)
         predictions = s.classify(texts_test)
         
         auc = calculate_auc(classes_test, predictions)
         auc_sum += auc
         
     return auc_sum / folds
示例#2
0
 def test(texts, classes, models, nn_params, folds=4):
     '''
         Check the performance on an SVM implementation,
         given a list of texts and their classes (negative/neutral/positive)
         Uses k-fold cross-validation (keeping in mind to divide the data
         appropriately, depending on the class)
     '''
     classes = np.array(classes)
     texts = np.array(texts)
     
     wrongs = []
     auc_sum = 0
     
     for train, test in cross_validation.StratifiedKFold(classes, folds):
         texts_train = texts[train]
         classes_train = classes[train]
         texts_test = texts[test]
         classes_test = classes[test]
         n = Ensemble(texts_train, classes_train, nn_params, models)
         predictions = n.classify(texts_test)
         predictions[predictions<0] = 0
         
         auc = calculate_auc(classes_test, predictions)
         print auc
         auc_sum += auc
         
         for i in range(len(texts_test)):
             if abs(classes_test[i] - predictions[i]) > 0.5:
                 wrongs.append((classes_test[i], predictions[i], texts_test[i]))
         
     '''
     import csv
     writer = open('wrongs.csv', 'w')
     for w in wrongs:
         writer.write('%s,%s,%s\n' % w)
     writer.close()
     '''
     
     return auc_sum / folds
示例#3
0
# evaluate the classfier on verification dataset
texts = []
classes = []
csvr = csv.reader(open('./dataset/test_with_solutions.csv', 'rb'),
                  delimiter=',',
                  quotechar='"')
csvr.next()
for row in csvr:
    texts.append(row[2].decode('utf8'))
    classes.append(int(row[0]))
results = n.classify(texts)

results[results < 0] = 0
results[results > 1] = 1
print sys.argv[1] + " --- " + ` calculate_auc(classes, results) `
end = time.time()
# print "classification time="
# print end-start
# writer = open('rez.csv', 'w')
# for r in results:
#     writer.write('%s\n' % r)
# writer.close()

# wrongs = []
# for i in range(len(texts)):
#     if abs(classes[i] - results[i]) > 0.5:
#         wrongs.append((classes[i], results[i], texts[i]))

# import csv
# writer = open('wrongs.csv', 'w')
示例#4
0
m2 = Dictionary(texts, classes)

texts = []
classes = []
csvr = csv.reader(open('test_with_solutions.csv', 'rb'),
                  delimiter=',',
                  quotechar='"')
csvr.next()
for row in csvr:
    texts.append(row[2].decode('utf8'))
    classes.append(int(row[0]))
#results = n.classify(texts)
#results[results<0] = 0
#print calculate_auc(classes, results)
r1 = m1.classify(texts)
print calculate_auc(classes, r1)
r2 = np.array(m2.classify(texts))
print calculate_auc(classes, r2)
r = (1.2 * r1 + 0.8 * r2) / 2
r[r > 1] = 1
r[r < 0] = 0
print calculate_auc(classes, r)

#print TestSVM.test_model(texts, classes, models[-1])
#print TestSVM.test(texts, classes, models, nn_params)
n = Ensemble(texts, classes, nn_params, models)

texts = []
csvr = csv.reader(open('test.csv', 'rb'), delimiter=',', quotechar='"')
csvr.next()
for row in csvr:
示例#5
0
 
m1 = ChSVM(texts, classes)
m2 = Dictionary(texts, classes)
 
texts = []
classes = []
csvr = csv.reader(open('test_with_solutions.csv', 'rb'), delimiter=',', quotechar='"')
csvr.next()
for row in csvr:
    texts.append(row[2].decode('utf8'))
    classes.append(int(row[0]))
#results = n.classify(texts)
#results[results<0] = 0
#print calculate_auc(classes, results)
r1 = m1.classify(texts)
print calculate_auc(classes, r1)
r2 = np.array(m2.classify(texts))
print calculate_auc(classes, r2)
r = (1.2*r1 + 0.8*r2) / 2
r[r>1] = 1
r[r<0] = 0
print calculate_auc(classes, r)
  
#print TestSVM.test_model(texts, classes, models[-1])
#print TestSVM.test(texts, classes, models, nn_params)
n = Ensemble(texts, classes, nn_params, models)



texts = []
csvr = csv.reader(open('test.csv', 'rb'), delimiter=',', quotechar='"')
示例#6
0
start = time.time()

# evaluate the classfier on verification dataset
texts = []
classes = []
csvr = csv.reader(open('./dataset/test_with_solutions.csv', 'rb'), delimiter=',', quotechar='"')
csvr.next()
for row in csvr:
    texts.append(row[2].decode('utf8'))
    classes.append(int(row[0]))
results = n.classify(texts)


results[results<0] = 0
results[results>1] = 1
print sys.argv[1]+" --- "+`calculate_auc(classes,results)`
end = time.time()
# print "classification time="
# print end-start
# writer = open('rez.csv', 'w')
# for r in results:
#     writer.write('%s\n' % r)
# writer.close()




# wrongs = []
# for i in range(len(texts)):
#     if abs(classes[i] - results[i]) > 0.5:
#         wrongs.append((classes[i], results[i], texts[i]))