def svm_predictor(alpha, x_train, y_train, x_test, kernel=hw3_utils.poly(degree=1)): """An SVM predictor. Arguments: alpha: a 1d tensor with shape (n,), denoting an optimal dual solution. x_train: a 2d tensor with shape (n, d), denoting the training set. y_train: a 1d tensor with shape (n,), whose elememnts are +1 or -1. x_test: a 2d tensor with shape (m, d), denoting the test set. kernel: the kernel function. The default kernel function is 1 + <x, y>. Return: A 1d tensor with shape (m,), the outputs of SVM on the test set. """ # Do I need to cut off very small alpha to 0? N = x_train.size(0) M = x_test.size(0) Q = torch.empty((N, M)) for i in range(N): for j in range(M): Q[i][j] = kernel(x_test[j], x_train[i]) * y_train[i] return alpha @ Q
def svm_predictor(alpha, x_train, y_train, x_test, kernel=hw3_utils.poly(degree=1)): """An SVM predictor. Arguments: alpha: a 1d tensor with shape (n,), denoting an optimal dual solution. x_train: a 2d tensor with shape (n, d), denoting the training set. y_train: a 1d tensor with shape (n,), whose elememnts are +1 or -1. x_test: a 2d tensor with shape (m, d), denoting the test set. kernel: the kernel function. The default kernel function is 1 + <x, y>. Return: A 1d tensor with shape (m,), the outputs of SVM on the test set. """ y_hat = torch.zeros(x_test.shape[0]) for j in range(x_test.shape[0]): y_hat[j] = sum([ alpha[i] * y_train[i] * kernel(x_train[i], x_test[j]) for i in range(x_train.shape[0]) ]) return y_hat
def svm_solver(x_train, y_train, lr, num_iters, kernel=hw3_utils.poly(degree=1), c=None): """An SVM solver. Arguments: x_train: a 2d tensor with shape (n, d). y_train: a 1d tensor with shape (n,), whose elememnts are +1 or -1. lr: the learning rate. num_iters: the number of gradient descent steps. kernel: the kernel function. The default kernel function is 1 + <x, y>. c: the trade-off parameter in soft-margin SVM. The default value is None, referring to the basic, hard-margin SVM. Return: alpha: a 1d tensor with shape (n,), denoting an optimal dual solution. Initialize alpha to be 0. Return alpha.detach() could possibly help you save some time when you try to use alpha in other places. Note that if you use something like alpha = alpha.clamp(...) with torch.no_grad(), you will have alpha.requires_grad=False after this step. You will then need to use alpha.requires_grad_(). Alternatively, use in-place operations such as clamp_(). """ alpha = torch.zeros((1, x_train.size(0)), requires_grad=True) N = x_train.size(0) Q = torch.empty((N, N)) for i in range(N): for j in range(N): Q[i][j] = kernel(x_train[i], x_train[j]) * y_train[i] * y_train[j] def loss_func(): return 0.5 * alpha @ Q @ alpha.transpose(1, 0) - alpha.sum() sgd = torch.optim.SGD([alpha], lr=lr) for iter in range(num_iters): loss = loss_func() loss.backward() sgd.step() sgd.zero_grad() with torch.no_grad(): alpha.clamp_(min=0, max=c) return alpha
def svm_solver(x_train, y_train, lr, num_iters, kernel=hw3_utils.poly(degree=1), c=None): """An SVM solver. Arguments: x_train: a 2d tensor with shape (n, d). y_train: a 1d tensor with shape (n,), whose elememnts are +1 or -1. lr: the learning rate. num_iters: the number of gradient descent steps. kernel: the kernel function. The default kernel function is 1 + <x, y>. c: the trade-off parameter in soft-margin SVM. The default value is None, referring to the basic, hard-margin SVM. Return: alpha: a 1d tensor with shape (n,), denoting an optimal dual solution. Initialize alpha to be 0. Return alpha.detach() could possibly help you save some time when you try to use alpha in other places. Note that if you use something like alpha = alpha.clamp(...) with torch.no_grad(), you will have alpha.requires_grad=False after this step. You will then need to use alpha.requires_grad_(). Alternatively, use in-place operations such as clamp_(). """ a = torch.zeros(x_train.shape[0]) for j in range(0, num_iters): X = [[kernel(x_train[i], x) for x in x_train] for i in range(x_train.shape[0])] grad = torch.stack([ y_train[i] * torch.sum(a * y_train @ torch.tensor(X)) - 1 for i in range(x_train.shape[0]) ]) a = torch.clamp_(a - (lr * grad), 0., c) return a
# loss_func = nn.CrossEntropyLoss() # sgd = torch.optim.SGD(net.parameters(), lr=0.005) # train, val = hw3_utils.torch_digits() # train_epoch_loss, validation_epoch_loss = fit_and_validate(net, sgd, loss_func, train, val, n_epochs=30, batch_size=16) # plt.plot(range(len(train_epoch_loss)), train_epoch_loss, label='train_loss') # plt.plot(range(len(validation_epoch_loss)), validation_epoch_loss, label='val_loss') # plt.legend() # plt.show() #P6 x_train, y_train = xor_data() # alpha = svm_solver(x_train, y_train, lr=0.1, num_iters=10000, # kernel=hw3_utils.poly(degree=2), c=None) # svm_contour(alpha, x_train, y_train, kernel=hw3_utils.poly(degree=2), # xmin=-5, xmax=5, ymin=-5, ymax=5, ngrid = 33) alpha = svm_solver(x_train, y_train, lr=0.1, num_iters=10000, kernel=hw3_utils.poly(degree=2), c=None) svm_contour(alpha, x_train, y_train, kernel=rbf(sigma=1), xmin=-5, xmax=5, ymin=-5, ymax=5, ngrid = 33) alpha = svm_solver(x_train, y_train, lr=0.1, num_iters=10000, kernel=hw3_utils.poly(degree=2), c=None) svm_contour(alpha, x_train, y_train, kernel=rbf(sigma=2), xmin=-5, xmax=5, ymin=-5, ymax=5, ngrid = 33) alpha = svm_solver(x_train, y_train, lr=0.1, num_iters=10000, kernel=hw3_utils.poly(degree=2), c=None) svm_contour(alpha, x_train, y_train, kernel=rbf(sigma=4), xmin=-5, xmax=5, ymin=-5, ymax=5, ngrid = 33)