testFilename, testStartColIdx, typesDictBigrams) XTestTrigramFeatures = extractTrigramFeatureVecX.extractTrigramFeatureVecX( testFilename, testStartColIdx, typesDictTrigrams) #combining the two feature vectors below XTestFeatures = [] Ndata = len(XTestUnigramFeatures) #number of sentences in training data lUTypes = len(XTestUnigramFeatures[0]) #number of unigram types lBTypes = len(XTestBigramFeatures[0]) #number of unigram types lTTypes = len(XTestTrigramFeatures[0]) #number of unigram types for i in range(0, Ndata): XTestFeatures.append(XTestUnigramFeatures[i]) for j in range(0, lBTypes): XTestFeatures[i].append(XTestBigramFeatures[i][j]) for k in range(0, lTTypes): XTestFeatures[i].append(XTestTrigramFeatures[i][k]) #combining of feature vectors finished #print "XTEST Feature Vector of size ", len(XTestFeatures), " extracted"; #Using Trained SVM to classify data predictedLabels = clf.predict(XTestFeatures) #Writing predicted labels to file writeToFile = "TriBiUnigramSVMTest_C2000.0.txt" handleClassLabels.labelsToFile(predictedLabels, writeToFile) print "SUCCESS!"
#### CAN BE INEFFICIENT! CAN MAKE PREDICTIONS LINE BY LINE, IF WE FACE ISSUES # Extract feature vector from test data testFilename = "../rawdata/test/AB_SemEval2013_task2_test_fixed/input/twitter-test-input-B_500.tsv" testStartColIdx = 3 print "XTEST Feature Vector Extraction Started"; XTestUnigramFeatures = extractFeatureVecX.extractFeatureVecX(testFilename, testStartColIdx, typesDictUnigrams); XTestTrigramFeatures = extractTrigramFeatureVecX.extractTrigramFeatureVecX(testFilename, testStartColIdx, typesDictTrigrams); #combining the two feature vectors below XTestFeatures=[] Ndata=len(XTestUnigramFeatures)#number of sentences in training data lUTypes=len(XTestUnigramFeatures[0]); #number of unigram types lTTypes=len(XTestTrigramFeatures[0]); #number of unigram types for i in range (0,Ndata): XTestFeatures.append(XTestUnigramFeatures[i]); for j in range (0,lTTypes): XTestFeatures[i].append(XTestTrigramFeatures[i][j]);#combining of feature vectors finished #print "XTEST Feature Vector of size ", len(XTestFeatures), " extracted"; #Using Trained SVM to classify data predictedLabels = clf.predict(XTestFeatures); #Writing predicted labels to file writeToFile = "TrigramUnigramSVMIgnoreUNK-B.txt" handleClassLabels.labelsToFile(predictedLabels, writeToFile); print "SUCCESS!";