/
svm.py
87 lines (71 loc) · 1.19 KB
/
svm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import math
import svm
def mult(a,b):
mul=0
for i in range(len(a)):
mul=mul+a[i]*b[i]
return mul
def add(a,b):
for i in range(len(a)):
a[i]=a[i]+b[i]
return a
def norm(a):
sum=0
for i in range(len(a)):
sum=sum+a[i]*a[i]
return math.sqrt(sum)
label ={}
x ={}
f = open('../data/train','r')
line = f.readline()
itr =0
total = 1000;
Total = total
max_len =0
while(line):
itr=itr+1
email = line.split(' ')
if(email[1]=='ham'):
label[itr] =-1
else:
label[itr] =1
i=2
arr= []
while(i<len(email)-1):
word = email[i]
count= int(email[i+1])
arr.append(count)
i+=2
x[itr] = arr
if(len(arr)>max_len):
max_len=len(arr)
line = f.readline()
total-=1
if(total==0):
break
f.close()
trained = svm.train(lebel,x,'-t 0')
label_test ={}
x_test ={}
ftest = open('../data/test','r')
line = ftest.readline()
itr =0
max_len =0
while(line):
itr=itr+1
email = line.split(' ')
if(email[1]=='ham'):
label_test[itr] =-1
else:
label_test[itr] =1
i=2
arr= []
while(i<len(email)-1):
word = email[i]
count= int(email[i+1])
arr.append(count)
i+=2
x_test[itr] = arr
line = ftest.readline()
ftest.close()
[prediction,accuracy,values]= svm.predict(label_test,x_test,trained)