Пример #1
0
X_test = test_data["Xtest"]
y_test = test_data["ytest"].flatten()
yy_test = np.ones(y_test.shape)
yy_test[y_test == 0] = -1
print "Done!"
sys.stdout.flush()

#############################################################################
# your code for setting up the best SVM classifier for this dataset         #
# Design the training parameters for the SVM.                               #
# What should the learning_rate be? What should C be?                       #
# What should num_iters be? Should X be scaled? Should X be kernelized?     #
#############################################################################
# your experiments below

svm = LinearSVM_twoclass()
svm.theta = np.zeros((X.shape[1],))

it = 2000
lr = 1e-4
print it, " iters"
print "learning rate ", lr

Cvals = [0.01, 0.05, 0.1, 0.5, 1, 5, 10, 50, 100]  # [0.01] #[0.01,0.03,0.1,0.3,1,3,10,30]
sigma_vals = [0.01, 0.05, 0.1, 0.5, 1, 5, 10, 50, 100]  # [0.01] #[0.01,0.03,0.1,0.3,1,3,10,30]

# best_C = 0.05
best_C = 0.05
# best_sigma = 1
best_sigma = 1
Пример #2
0
############################################################################
#  Part  2: Training linear SVM                                            #
#  We train a linear SVM on the data set and the plot the learned          #
#  decision boundary                                                       #
############################################################################

############################################################################
# TODO                                                                     #
# You will change this line to vary C.                                     #
############################################################################

C = 1

############################################################################

svm = LinearSVM_twoclass()
svm.theta = np.zeros((XX.shape[1], ))
svm.train(XX, yy, learning_rate=1e-4, C=C, num_iters=50000, verbose=True)

# classify the training data

y_pred = svm.predict(XX)

print "Accuracy on training data = ", metrics.accuracy_score(yy, y_pred)

# visualize the decision boundary

utils.plot_decision_boundary(scaleX, y, svm, 'x1', 'x2', ['neg', 'pos'])
plt.savefig('fig2.pdf')

############################################################################
Пример #3
0
############################################################################
#  Part  2: Training linear SVM                                            #
#  We train a linear SVM on the data set and the plot the learned          #
#  decision boundary                                                       #
############################################################################

############################################################################
# TODO                                                                     #
# You will change this line to vary C.                                     #
############################################################################

C = 100.

############################################################################

svm = LinearSVM_twoclass()
svm.theta = np.zeros((XX.shape[1],))
svm.train(XX,yy,learning_rate=1e-4,C=C,num_iters=50000,verbose=True)

# classify the training data

y_pred = svm.predict(XX)

print "Accuracy on training data = ", metrics.accuracy_score(yy,y_pred)

# visualize the decision boundary

utils.plot_decision_boundary(scaleX,y,svm,'x1','x2',['neg','pos'])
plt.savefig('fig2.pdf')

############################################################################
Пример #4
0
# your code for setting up the best SVM classifier for this dataset         #
# Design the training parameters for the SVM.                               #
# What should the learning_rate be? What should C be?                       #
# What should num_iters be? Should X be scaled? Should X be kernelized?     #
#############################################################################
# your experiments below

X_train, X_val, y_train, y_val = train_test_split(X,
                                                  y,
                                                  test_size=0.8,
                                                  random_state=42)

print X_train.shape
print X_val.shape

svm = LinearSVM_twoclass()
svm.theta = np.zeros((X.shape[1], ))
'''
    first select the best gaussian kernelized svm
'''

Cvals = [0.01, 0.03, 0.1, 0.3, 10, 30]
sigma_vals = [0.01, 0.03, 0.1, 0.3, 10, 30]
learning_rates = [1e-4, 1e-3, 1e-2, 1e-1, 1]
iterations = [100, 1000, 10000]

best_acc = 0
for sigma_val in sigma_vals:
    K = np.array([
        utils.gaussian_kernel(x1, x2, sigma_val) for x1 in X_train
        for x2 in X_train
Пример #5
0
# your code for setting up the best SVM classifier for this dataset         #
# Design the training parameters for the SVM.                               #
# What should the learning_rate be? What should C be?                       #
# What should num_iters be? Should X be scaled? Should X be kernelized?     #
#############################################################################
# your experiments below

# from sklearn import cross_validation
# XX, XXval, yy, yyval = cross_validation.train_test_split(X, yy, test_size=0.2)

# Cvals = [0.1, 0.3, 1, 3, 10, 30]
# lr_vals = [1e-2, 3e-2, 1e-1, 3e-1, 1, 3]
# iter_vals = [10000]#[1000, 5000, 10000, 25000]

# best_acc = 0
svm = LinearSVM_twoclass()
# # scaler = preprocessing.StandardScaler().fit(X)
# # scaleX = scaler.transform(X)
# # XX = np.vstack([np.ones((scaleX.shape[0],)), scaleX.T]).T

# # scalerval = preprocessing.StandardScaler().fit(XVal)
# # scaleXval = scalerval.transform(XVal)
# # XXval = np.vstack([np.ones((scaleXval.shape[0],)), scaleXval.T]).T

# for C in Cvals:
#     for lr in lr_vals:
#         for it in iter_vals:
#             print C, lr, it,
#             svm = LinearSVM_twoclass()
#             svm.theta = np.zeros((XX.shape[1],))
#             svm.train(XX, yy, learning_rate=lr, C=C, num_iters=it, verbose=False)
Пример #6
0
y_test = test_data['ytest'].flatten()

#############################################################################
# your code for setting up the best SVM classifier for this dataset         #
# Design the training parameters for the SVM.                               #
# What should the learning_rate be? What should C be?                       #
# What should num_iters be? Should X be scaled? Should X be kernelized?     #
#############################################################################
# your experiments below

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.8, random_state=42)

print X_train.shape
print X_val.shape

svm = LinearSVM_twoclass()
svm.theta = np.zeros((X.shape[1],))

'''
    first select the best gaussian kernelized svm
'''

Cvals = [0.01,0.03,0.1,0.3,10,30]
sigma_vals = [0.01,0.03,0.1,0.3,10,30]
learning_rates = [1e-4, 1e-3, 1e-2, 1e-1, 1]
iterations = [100 ,1000, 10000]

best_acc = 0
for sigma_val in sigma_vals:
    K = np.array([utils.gaussian_kernel(x1,x2,sigma_val) for x1 in X_train for x2 in X_train]).reshape(X_train.shape[0],X_train.shape[0])
    scaler = preprocessing.StandardScaler().fit(K)
Пример #7
0
yy = np.ones(y.shape)
yy[y==0] = -1

test_data = scipy.io.loadmat('data/spamTest.mat')
X_test = test_data['Xtest']
y_test = test_data['ytest'].flatten()

#############################################################################
# your code for setting up the best SVM classifier for this dataset         #
# Design the training parameters for the SVM.                               #
# What should the learning_rate be? What should C be?                       #
# What should num_iters be? Should X be scaled? Should X be kernelized?     #
#############################################################################
# your experiments below

svm = LinearSVM_twoclass()
svm.theta = np.zeros((X.shape[1],))


C = 1

svm.train(X,yy,learning_rate=1e-1,C=C,num_iters=8000,verbose=True)

#############################################################################
#  end of your code                                                         #
#############################################################################

#############################################################################
# what is the accuracy of the best model on the training data itself?       #
#############################################################################
# 2 lines of code expected
Пример #8
0
X, y = utils.load_mat("data/spamTrain.mat")
yy = np.ones(y.shape)
yy[y == 0] = -1
test_data = scipy.io.loadmat("data/spamTest.mat")
X_test = test_data["Xtest"]
y_test = test_data["ytest"].flatten()

#############################################################################
# your code for setting up the best SVM classifier for this dataset         #
# Design the training parameters for the SVM.                               #
# What should the learning_rate be? What should C be?                       #
# What should num_iters be? Should X be scaled? Should X be kernelized?     #
#############################################################################
# your experiments below

svm = LinearSVM_twoclass()
svm.theta = np.zeros((X.shape[1],))
# Experiment to find best C ,learning rate and number of iterations (no kernal)

# X_train, X_val, y_train, y_val = train_test_split(X, yy, test_size=0.2)
# iters=list(np.array(range(801))*5)
# trace={}
# for lr in [0.01,0.05,0.1,0.5]:
# 	trace[lr]=[]
# 	svm.theta = np.zeros((X.shape[1],))
# 	trace[lr].append(metrics.accuracy_score(y_val,svm.predict(X_val)))
# 	for i in range(0,800):
# 		svm.train(X_train,y_train,learning_rate=lr,C=0.1,num_iters=5,verbose=False)
# 		a=metrics.accuracy_score(y_val,svm.predict(X_val))
# 		trace[lr].append(a)
# 		print("%.2f,%d,%f\d"%(lr,i,a))
Пример #9
0
test_data = scipy.io.loadmat('data/spamTest.mat')
X_test = test_data['Xtest']
y_test = test_data['ytest'].flatten()
print "Done!"
sys.stdout.flush()

#############################################################################
# your code for setting up the best SVM classifier for this dataset         #
# Design the training parameters for the SVM.                               #
# What should the learning_rate be? What should C be?                       #
# What should num_iters be? Should X be scaled? Should X be kernelized?     #
#############################################################################
# your experiments below

svm = LinearSVM_twoclass()
svm.theta = np.zeros((X.shape[1],))


Cvals = [0.01,0.03,0.1,0.3,1,3,10,30]
sigma_vals = [0.01,0.03,0.1,0.3,1,3,10,30]

best_C = 0.01
best_sigma = 0.01

best_acc = 0;
for sigma in sigma_vals:
  print "Calculating K"
  sys.stdout.flush()
  K = np.array([utils.gaussian_kernel(x1,x2,sigma) for x1 in X for x2 in X]).reshape(X.shape[0],X.shape[0])
  scaler = preprocessing.StandardScaler().fit(K)