Exemplo n.º 1
0
    testFilename, testStartColIdx, typesDictBigrams)
XTestTrigramFeatures = extractTrigramFeatureVecX.extractTrigramFeatureVecX(
    testFilename, testStartColIdx, typesDictTrigrams)

#combining the two feature vectors below
XTestFeatures = []
Ndata = len(XTestUnigramFeatures)  #number of sentences in training data
lUTypes = len(XTestUnigramFeatures[0])
#number of unigram types
lBTypes = len(XTestBigramFeatures[0])
#number of unigram types
lTTypes = len(XTestTrigramFeatures[0])
#number of unigram types
for i in range(0, Ndata):
    XTestFeatures.append(XTestUnigramFeatures[i])
    for j in range(0, lBTypes):
        XTestFeatures[i].append(XTestBigramFeatures[i][j])
    for k in range(0, lTTypes):
        XTestFeatures[i].append(XTestTrigramFeatures[i][k])
        #combining of feature vectors finished

#print "XTEST Feature Vector of size ", len(XTestFeatures), " extracted";
#Using Trained SVM to classify data
predictedLabels = clf.predict(XTestFeatures)

#Writing predicted labels to file
writeToFile = "TriBiUnigramSVMTest_C2000.0.txt"
handleClassLabels.labelsToFile(predictedLabels, writeToFile)

print "SUCCESS!"


#### CAN BE INEFFICIENT! CAN MAKE PREDICTIONS LINE BY LINE, IF WE FACE ISSUES
# Extract feature vector from test data
testFilename = "../rawdata/test/AB_SemEval2013_task2_test_fixed/input/twitter-test-input-B_500.tsv"
testStartColIdx = 3
print "XTEST Feature Vector Extraction Started";
XTestUnigramFeatures = extractFeatureVecX.extractFeatureVecX(testFilename, testStartColIdx, typesDictUnigrams);
XTestTrigramFeatures = extractTrigramFeatureVecX.extractTrigramFeatureVecX(testFilename, testStartColIdx, typesDictTrigrams);
#combining the two feature vectors below
XTestFeatures=[]
Ndata=len(XTestUnigramFeatures)#number of sentences in training data 
lUTypes=len(XTestUnigramFeatures[0]); #number of unigram types
lTTypes=len(XTestTrigramFeatures[0]); #number of unigram types
for i in range (0,Ndata):
    XTestFeatures.append(XTestUnigramFeatures[i]);
    for j in range (0,lTTypes):
        XTestFeatures[i].append(XTestTrigramFeatures[i][j]);#combining of feature vectors finished

#print "XTEST Feature Vector of size ", len(XTestFeatures), " extracted";
#Using Trained SVM to classify data
predictedLabels = clf.predict(XTestFeatures);

#Writing predicted labels to file
writeToFile = "TrigramUnigramSVMIgnoreUNK-B.txt"
handleClassLabels.labelsToFile(predictedLabels, writeToFile);


print "SUCCESS!";