/
implementSVMs.py
153 lines (118 loc) · 6.04 KB
/
implementSVMs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import SVM
import parse
#useParser to get Reviews from Test or Validation get tuple (0,0,review)
test= parse.testingData()
(trueTable, decTable, truePosTable, trueNegTable, decPosTable, decNegTable, posTable, negTable)= parse.svm_buckets()
#loop through each review and use one svm...output a text file with answers...
#print test[0]['uni']
def getTotalCountFeature(type1, table):
values= table[type1].values()
#print len(values)
numTotal=0
for v in values:
numTotal+=v
return numTotal
output1= open('svm1Tests/char.txt',"w")
output2= open('svm1Tests/uni.txt',"w")
output3= open('svm1Tests/big.txt',"w")
output4= open('svm2Tests/char.txt',"w")
output5= open('svm2Tests/uni.txt',"w")
output6= open('svm2Tests/big.txt',"w")
output7= open('svm1Tests/pos.txt','w')
output8= open('svm2Tests/pos.txt','w')
output= open('ValidationAnswers/T:D.txt',"w")
#svm1 char
#svm1 uni
#svm1 big
type1= 'char'
numFeaturesTrueC= getTotalCountFeature(type1, trueTable)
numFeaturesDecC= getTotalCountFeature(type1, decTable)
numFeaturesPosC= getTotalCountFeature(type1, posTable)
numFeaturesNegC= getTotalCountFeature(type1, negTable)
numFeaturesTrueGivenPC= getTotalCountFeature(type1, truePosTable)
numFeaturesDecGivenPC= getTotalCountFeature(type1, decPosTable)
numFeaturesTrueGivenNC= getTotalCountFeature(type1, trueNegTable)
numFeaturesDecGivenNC= getTotalCountFeature(type1, decNegTable)
type2= 'uni'
numFeaturesTrueU= getTotalCountFeature(type2, trueTable)
numFeaturesDecU= getTotalCountFeature(type2, decTable)
numFeaturesPosU= getTotalCountFeature(type2, posTable)
numFeaturesNegU= getTotalCountFeature(type2, negTable)
numFeaturesTrueGivenPU= getTotalCountFeature(type2, truePosTable)
numFeaturesDecGivenPU= getTotalCountFeature(type2, decPosTable)
numFeaturesTrueGivenNU= getTotalCountFeature(type2, trueNegTable)
numFeaturesDecGivenNU= getTotalCountFeature(type2, decNegTable)
type3= 'big'
numFeaturesTrueB= getTotalCountFeature(type3, trueTable)
numFeaturesDecB= getTotalCountFeature(type3, decTable)
numFeaturesPosB= getTotalCountFeature(type3, posTable)
numFeaturesNegB= getTotalCountFeature(type3, negTable)
numFeaturesTrueGivenPB= getTotalCountFeature(type3, truePosTable)
numFeaturesDecGivenPB= getTotalCountFeature(type3, decPosTable)
numFeaturesTrueGivenNB= getTotalCountFeature(type3, trueNegTable)
numFeaturesDecGivenNB= getTotalCountFeature(type3, decNegTable)
type4= 'pos'
numFeaturesTrueP= getTotalCountFeature(type4, trueTable)
numFeaturesDecP= getTotalCountFeature(type4, decTable)
numFeaturesPosP= getTotalCountFeature(type4, posTable)
numFeaturesNegP= getTotalCountFeature(type4, negTable)
numFeaturesTrueGivenPP= getTotalCountFeature(type4, truePosTable)
numFeaturesDecGivenPP= getTotalCountFeature(type4, decPosTable)
numFeaturesTrueGivenNP= getTotalCountFeature(type4, trueNegTable)
numFeaturesDecGivenNP= getTotalCountFeature(type4, decNegTable)
for te in test:
posOrNegC= SVM.getTrueOrDeceptive(type1, numFeaturesPosC, numFeaturesNegC, te[type1], posTable, negTable)
posOrNegU= SVM.getTrueOrDeceptive(type2, numFeaturesPosU, numFeaturesNegU, te[type2], posTable, negTable)
posOrNegB= SVM.getTrueOrDeceptive(type3, numFeaturesPosB, numFeaturesNegB, te[type3], posTable, negTable)
posOrNegP= SVM.getTrueOrDeceptive(type4, numFeaturesPosP, numFeaturesNegP, te[type4], posTable, negTable)
trueOrDecC=0
trueOrDecU=0
trueOrDecB=0
trueOrDecP=0
if posOrNegC == 1:
trueOrDecC= SVM.getTrueOrDeceptive(type1, numFeaturesTrueGivenPC, numFeaturesDecGivenPC, te[type1],truePosTable, decPosTable)
else:
trueOrDecC= SVM.getTrueOrDeceptive(type1, numFeaturesTrueGivenNC, numFeaturesDecGivenNC, te[type1],trueNegTable, decNegTable)
if posOrNegU == 1:
trueOrDecU= SVM.getTrueOrDeceptive(type2, numFeaturesTrueGivenPU, numFeaturesDecGivenPU, te[type2], truePosTable, decPosTable)
else:
trueOrDecU= SVM.getTrueOrDeceptive(type2, numFeaturesTrueGivenNU, numFeaturesDecGivenNU, te[type2], trueNegTable, decNegTable)
if posOrNegB == 1:
trueOrDecB= SVM.getTrueOrDeceptive(type3, numFeaturesTrueGivenPB, numFeaturesDecGivenPB, te[type3], truePosTable, decPosTable)
else:
trueOrDecB= SVM.getTrueOrDeceptive(type3, numFeaturesTrueGivenNB, numFeaturesDecGivenNB, te[type3], trueNegTable, decNegTable)
if posOrNegP == 1:
trueOrDecP= SVM.getTrueOrDeceptive(type4, numFeaturesTrueGivenPP, numFeaturesDecGivenPP, te[type4], truePosTable, decPosTable)
else:
trueOrDecP= SVM.getTrueOrDeceptive(type4, numFeaturesTrueGivenNP, numFeaturesDecGivenNP, te[type4], trueNegTable, decNegTable)
if te == test[-1]:
output1.write(str(SVM.getTrueOrDeceptive(type1, numFeaturesTrueC, numFeaturesDecC, te[type1], trueTable, decTable)))
output2.write(str(SVM.getTrueOrDeceptive(type2, numFeaturesTrueU, numFeaturesDecU, te[type2], trueTable, decTable)))
output3.write(str(SVM.getTrueOrDeceptive(type3, numFeaturesTrueB, numFeaturesDecB, te[type3], trueTable, decTable)))
output7.write(str(SVM.getTrueOrDeceptive(type4, numFeaturesTrueP, numFeaturesDecP, te[type4], trueTable, decTable)))
output4.write(str(trueOrDecC))
output5.write(str(trueOrDecU))
output6.write(str(trueOrDecB))
output8.write(str(trueOrDecP))
output.write(str(te["IsTrue"]))
else:
output1.write(str(SVM.getTrueOrDeceptive(type1, numFeaturesTrueC, numFeaturesDecC, te[type1], trueTable, decTable))+"\n")
output2.write(str(SVM.getTrueOrDeceptive(type2, numFeaturesTrueU, numFeaturesDecU, te[type2], trueTable, decTable))+"\n")
output3.write(str(SVM.getTrueOrDeceptive(type3, numFeaturesTrueB, numFeaturesDecB, te[type3], trueTable, decTable))+"\n")
output7.write(str(SVM.getTrueOrDeceptive(type4, numFeaturesTrueP, numFeaturesDecP, te[type4], trueTable, decTable))+"\n")
output4.write(str(trueOrDecC)+"\n")
output5.write(str(trueOrDecU)+"\n")
output6.write(str(trueOrDecB)+"\n")
output8.write(str(trueOrDecP)+"\n")
print te.keys()
output.write(str(te["IsTrue"])+"\n")
output1.close()
output2.close()
output3.close()
output4.close()
output5.close()
output6.close()
output7.close()
output8.close()
print "Done evaluating"
#for Validation set...at same time create textfile with true answers so we can calculate our percentages...