Exemplo n.º 1
0
def test_SVM_grad_vectorized_comparison_sparse(sample_train, check_count):
    Xtrain, ytrain = sample_train(count=500)
    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))

    mean_image = np.mean(Xtrain, axis=0)
    Xtrain -= mean_image

    Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))])

    W = np.random.randn(Xtrain.shape[1],10) * 0.0001
    loss, grad = svm_loss_vectorized(W, Xtrain, ytrain, 0.)

    f = lambda w: svm_loss_vectorized(w, Xtrain, ytrain, 0.0)[0]
    g = lambda w: svm_loss_vectorized(w, Xtrain, ytrain, 0.0)[1]
    #(f(W+vec(h)) - f(W-vec(h)))/2/|vec(h)| = approximately dot(f'(W),vec(h)) * vec(h)
    #grad(loss) = grad vectorized

    num_checks = check_count
    grad_analytic = g(W)
    for i in range(num_checks):
        ix = tuple([random.randrange(m) for m in W.shape])
        shift = np.zeros(W.shape)
        shift[ix] = 1e-7
        grad_numerical = (f(W + shift) - f(W - shift)) / (2 * 1e-7)
        assert( abs(grad_numerical - grad_analytic[ix]) / (abs(grad_numerical) + abs(grad_analytic[ix])) < 0.0001)
Exemplo n.º 2
0
def test_SVM_grad_vectorized_comparison_sparse(sample_train, check_count):
    Xtrain, ytrain = sample_train(count=500)
    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))

    mean_image = np.mean(Xtrain, axis=0)
    Xtrain -= mean_image

    Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))])

    W = np.random.randn(Xtrain.shape[1], 10) * 0.0001
    loss, grad = svm_loss_vectorized(W, Xtrain, ytrain, 0.)

    f = lambda w: svm_loss_vectorized(w, Xtrain, ytrain, 0.0)[0]
    g = lambda w: svm_loss_vectorized(w, Xtrain, ytrain, 0.0)[1]
    #(f(W+vec(h)) - f(W-vec(h)))/2/|vec(h)| = approximately dot(f'(W),vec(h)) * vec(h)
    #grad(loss) = grad vectorized

    num_checks = check_count
    grad_analytic = g(W)
    for i in range(num_checks):
        ix = tuple([random.randrange(m) for m in W.shape])
        shift = np.zeros(W.shape)
        shift[ix] = 1e-7
        grad_numerical = (f(W + shift) - f(W - shift)) / (2 * 1e-7)
        assert (abs(grad_numerical - grad_analytic[ix]) /
                (abs(grad_numerical) + abs(grad_analytic[ix])) < 0.0001)
Exemplo n.º 3
0
def test_SVM_loss_vectorized_comparison_mean(sample_train, train_count, reg):
    Xtrain, ytrain = sample_train(count=train_count)
    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))

    mean_image = np.mean(Xtrain, axis=0)
    Xtrain_mean_removed = Xtrain - mean_image

    Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))])
    Xtrain_mean_removed = np.hstack([Xtrain_mean_removed, np.ones((Xtrain_mean_removed.shape[0], 1))])

    W = np.random.randn(Xtrain.shape[1],10) * 0.0001
    loss, grad = svm_loss_vectorized(W, Xtrain, ytrain, reg)
    loss_mean_removed, grad_mean_removed = svm_loss_vectorized(W, Xtrain_mean_removed, ytrain, reg)
    #assert np.abs(loss - loss_mean_removed) > 0.01
    assert np.linalg.norm(grad - grad_mean_removed) > 1.0
Exemplo n.º 4
0
def svm_test_vectorized(x, y):
    # 随机生成一个很小的SVM权重矩阵,先标准正态分布,然后乘0.0001
    W = np.random.randn(3073, 10) * 0.0001
    # 计算SVM分类器的损失和权重的梯度(无正则项)
    loss, gradient = svm_loss_vectorized(W, x, y, 0.0)
    # 随机选取W中的几个维度,计算数值梯度和解析梯度进行对比,验证正确性。 随机选取过程在gradient_check中
    # 定义一个lambda表达式,计算损失值loss,
    f = lambda w: svm_loss_vectorized(w, x, y, 0.0)[0]
    grad_check_sparse(f, W, gradient)
    print('turn on regularization')
    # 计算SVM分类器的损失和权重的梯度(有正则项)
    loss, gradient = svm_loss_vectorized(W, x, y, 5e1)
    # 随机选取W中的几个维度,计算数值梯度和解析梯度进行对比,验证正确性。 随机选取过程在gradient_check中
    # 定义一个lambda表达式,计算损失值loss,
    f = lambda w: svm_loss_vectorized(w, x, y, 5e1)[0]
    grad_check_sparse(f, W, gradient)
Exemplo n.º 5
0
def test_SVM_loss_vectorized_no_bias_X(sample_train, sample_test):
    Xtrain, ytrain = sample_train(count=40)
    Xtest, ytest   = sample_test(count=20)

    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))

    W = np.random.randn(Xtrain.shape[1] + 1,10) * 0.0001
    with pytest.raises(ValueError):
        loss, grad = svm_loss_vectorized(W, Xtrain, ytrain, 1e2)
Exemplo n.º 6
0
def test_SVM_loss_vectorized_comparison_mean(sample_train, train_count, reg):
    Xtrain, ytrain = sample_train(count=train_count)
    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))

    mean_image = np.mean(Xtrain, axis=0)
    Xtrain_mean_removed = Xtrain - mean_image

    Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))])
    Xtrain_mean_removed = np.hstack(
        [Xtrain_mean_removed,
         np.ones((Xtrain_mean_removed.shape[0], 1))])

    W = np.random.randn(Xtrain.shape[1], 10) * 0.0001
    loss, grad = svm_loss_vectorized(W, Xtrain, ytrain, reg)
    loss_mean_removed, grad_mean_removed = svm_loss_vectorized(
        W, Xtrain_mean_removed, ytrain, reg)
    #assert np.abs(loss - loss_mean_removed) > 0.01
    assert np.linalg.norm(grad - grad_mean_removed) > 1.0
Exemplo n.º 7
0
def test_SVM_loss_vectorized_no_bias_X(sample_train, sample_test):
    Xtrain, ytrain = sample_train(count=40)
    Xtest, ytest = sample_test(count=20)

    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))

    W = np.random.randn(Xtrain.shape[1] + 1, 10) * 0.0001
    with pytest.raises(ValueError):
        loss, grad = svm_loss_vectorized(W, Xtrain, ytrain, 1e2)
Exemplo n.º 8
0
def test_SVM_loss_naive_vectorized_comparison_reg(sample_train, train_count, reg):
    Xtrain, ytrain = sample_train(count=train_count)
    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))
    Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))])

    W = np.random.randn(Xtrain.shape[1],10) * 0.0001
    loss, grad = svm_loss_vectorized(W, Xtrain, ytrain, reg)
    loss_naive, grad_naive = svm_loss_naive(W, Xtrain, ytrain, reg)
    assert np.abs(loss - loss_naive) < 0.0001
    assert np.linalg.norm(grad - grad_naive) < 0.0001
Exemplo n.º 9
0
def test_SVM_loss_naive_vectorized_comparison(sample_train, train_count):
    Xtrain, ytrain = sample_train(count=train_count)
    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))
    Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))])

    W = np.random.randn(Xtrain.shape[1], 10) * 0.0001
    loss, grad = svm_loss_vectorized(W, Xtrain, ytrain, 1e2)
    loss_naive, grad_naive = svm_loss_naive(W, Xtrain, ytrain, 1e2)
    assert np.abs(loss - loss_naive) < 0.0001
    assert np.linalg.norm(grad - grad_naive) < 0.0001
Exemplo n.º 10
0
def test_SVM_loss_vectorized_no_bias_W(sample_train, sample_test):
    Xtrain, ytrain = sample_train(count=40)
    Xtest, ytest   = sample_test(count=20)

    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))
    #using the incorrect W size
    W = np.random.randn(Xtrain.shape[1],10) * 0.0001

    #add the bias dimension (to X)
    Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))])

    with pytest.raises(ValueError):
        loss, grad = svm_loss_vectorized(W, Xtrain, ytrain, 1e2)
Exemplo n.º 11
0
def test_SVM_loss_vectorized_no_bias_W(sample_train, sample_test):
    Xtrain, ytrain = sample_train(count=40)
    Xtest, ytest = sample_test(count=20)

    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))
    #using the incorrect W size
    W = np.random.randn(Xtrain.shape[1], 10) * 0.0001

    #add the bias dimension (to X)
    Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))])

    with pytest.raises(ValueError):
        loss, grad = svm_loss_vectorized(W, Xtrain, ytrain, 1e2)
Exemplo n.º 12
0
def main():
    X_train, y_train, X_val, y_val, X_test, y_test = gen_train_val_test(49000, 1000, 1000)
    # generate a random SVM weight matrix of small numbers
    W = np.random.randn(10, 3073) * 0.01
    start = time.clock()
    loss, grad = svm_loss_naive(W, X_train, y_train, 0.00001)
    end = time.clock()
    print "svm_loss_naive: %f s" % (end - start)
    print 'loss: %f' % (loss, )
    start = time.clock()
    loss1, grad = svm_loss_vectorized(W, X_train, y_train, 0.00001)
    end = time.clock()
    print "svm_loss_vectorized: %f s" % (end - start)
    print 'loss: %f' % (loss1, )
    svm = LinearSVM()
    tic = time.time()
    loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=3e4,
                                  num_iters=100000,batch_size=128, verbose=True)
    acc_train = evaluation(svm, X_train, y_train)
    acc_val = evaluation(svm, X_val, y_val)
    acc_test = evaluation(svm, X_test, y_test)
    print 'Train acc :{} Validation :{} Test :{}'.format(acc_train, acc_val, acc_test)
    toc = time.time()
    print 'That took %fs' % (toc - tic)
Exemplo n.º 13
0
X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])
''' Evaluate naive implementation of loss '''
# Generate a random SVM weight matrix of small numbers
W = np.random.randn(3073, 10) * 0.0001  # 3073x10 (10 classes)
loss, grad = svm_loss_naive(W, X_dev, y_dev, 0)

# # Numerically compute the gradient along several randomly chosen dimensions
# #   and compare with analytically computed gradient (grad)
# f = lambda w: svm_loss_naive(w, X_dev, y_dev, 0.0)[0] # Returns the loss
# grad_numerical = grad_check_sparse(f, W, grad)
# # Again with the regularization turned on
# loss, grad = svm_loss_naive(W, X_dev, y_dev, 5e1)
# f = lambda w: svm_loss_naive(w, X_dev, y_dev, 5e1)[0] # Returns the loss
# grad_numerical = grad_check_sparse(f, W, grad)
''' Evaluate vectorized implementation of loss '''
loss_v, grad_v = svm_loss_vectorized(W, X_dev, y_dev, 0)
print("Gradient difference", np.linalg.norm(grad - grad_v))
print("Loss difference", loss - loss_v)
''' Implement Stochastic Gradient Descent to minimize loss '''
svm = LinearSVM()
tic = time.time()
# Get list of loss history over training and visualize
loss_hist = svm.train(X_train,
                      y_train,
                      learning_rate=1e-7,
                      reg=2.5e4,
                      num_iters=1500,
                      verbose=True)
toc = time.time()
print("Time", (toc - tic))
plt.plot(loss_hist)
Exemplo n.º 14
0
Arquivo: test.py Projeto: Tang7/cnn231
print correct[:,:2]

diff -= correct

# print diff

dW = diff.dot(X.T)/float(num_train)

# print dW

loss1, grad = svm_loss_naive(W, X, y, 1)

# print grad


loss2, grad2 = svm_loss_vectorized(W, X, y, 1)

# print grad2
print loss
print loss1
print loss2

differ = np.linalg.norm(grad - grad2, ord='fro')
print 'difference: %f' % differ

differ1 = np.linalg.norm(dW - grad, ord='fro')
print 'difference: %f' % differ1

differ2 = np.linalg.norm(dW - grad2, ord='fro')
print 'difference: %f' % differ2
Exemplo n.º 15
0
# generate a random SVM weight matrix of small numbers
W = np.random.randn(196608, 5) * 0.0001

h5f = h5py.File('img_data.h5', 'r')
X = h5f['dataset_1'][:]
h5f.close()
y = loadtxt("y_labels.txt", dtype=np.uint8, delimiter="\n", unpack=False)

#X_train = np.zeros((27116,196608))
#y_train = np.zeros(27116)
#X_val = np.zeros((5000,196608))
#y_val = np.zeros(5000)

X_train = X[8000:35117, :]
y_train = y[8000:35117]
X_val = X[3000:8000, :]
y_val = y[3000:8000]
loss_vectorized, grad_vectorized = svm_loss_vectorized(W, X_train, y_train,
                                                       0.00001)
svm = LinearSVM()
loss_hist = svm.train(X_train,
                      y_train,
                      learning_rate=1e-7,
                      reg=5e4,
                      num_iters=1500,
                      verbose=True)
y_train_pred = svm.predict(X_train)
print 'training accuracy: %f' % (np.mean(y_train == y_train_pred), )
y_val_pred = svm.predict(X_val)
print 'validation accuracy: %f' % (np.mean(y_val == y_val_pred), )
Exemplo n.º 16
0
 def loss(self, X_batch, y_batch, reg):
     return svm_loss_vectorized(self.W, X_batch, y_batch, reg)
Exemplo n.º 17
0
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_val = np.reshape(X_val, (X_val.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
X_dev = np.reshape(X_dev, (X_dev.shape[0], -1))

mean_image = np.mean(X_train, axis=0)
# second: subtract the mean image from train and test data
X_train -= mean_image
X_val -= mean_image
X_test -= mean_image
X_dev -= mean_image

# third: append the bias dimension of ones (i.e. bias trick) so that our SVM
# only has to worry about optimizing a single weight matrix W.
X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])
X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])

W = np.random.randn(3073, 10) * 0.0001

# svm_loss_naive  svm_loss_vectorized
loss, grad = svm_loss_vectorized(W, X_dev, y_dev, 0.000005)
f = lambda w: svm_loss_vectorized(w, X_dev, y_dev, 0.0)[0]
grad_numerical = grad_check_sparse(f, W, grad)
print(loss)

loss, grad = svm_loss_vectorized(W, X_dev, y_dev, 5e1)
f = lambda w: svm_loss_vectorized(w, X_dev, y_dev, 5e1)[0]
grad_numerical = grad_check_sparse(f, W, grad)
Exemplo n.º 18
0
 def loss(self, X_batch, y_batch, reg):
     return svm_loss_vectorized(self.W, X_batch, y_batch, reg)
Exemplo n.º 19
0
# do the gradient check once again with regularization turned on
# you didn't forget the regularization gradient did you?
loss, grad = svm_loss_naive(W, X_dev, y_dev, 1e2)
f = lambda w: svm_loss_naive(w, X_dev, y_dev, 1e2)[0]
grad_numerical = grad_check_sparse(f, W, grad)

# Next implement the function svm_loss_vectorized; for now only compute the loss;
# we will implement the gradient in a moment.
tic = time.time()
loss_naive, grad_naive = svm_loss_naive(W, X_dev, y_dev, 0.00001)
toc = time.time()
print('Naive loss: %e computed in %fs' % (loss_naive, toc - tic))

from cs231n.classifiers.linear_svm import svm_loss_vectorized
tic = time.time()
loss_vectorized, _ = svm_loss_vectorized(W, X_dev, y_dev, 0.00001)
toc = time.time()
print('Vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic))

# The losses should match but your vectorized implementation should be much faster.
print('difference: %f' % (loss_naive - loss_vectorized))

# In the file linear_classifier.py, implement SGD in the function
# LinearClassifier.train() and then run it with the code below.
from cs231n.classifiers import LinearSVM
svm = LinearSVM()
tic = time.time()
loss_hist = svm.train(X_train,
                      y_train,
                      learning_rate=1e-7,
                      reg=5e4,
Exemplo n.º 20
0
print(mean_image[:10])  # print a few of the elements
plt.figure(figsize=(4, 4))
plt.imshow(mean_image.reshape((32, 32, 3)).astype(
    'uint8'))  # visualize the mean image
plt.show()

# second: subtract the mean image from train and test data
X_train -= mean_image
X_val -= mean_image
X_test -= mean_image
X_dev -= mean_image

# third: append the bias dimension of ones (i.e. bias trick) so that our SVM
# only has to worry about optimizing a single weight matrix W.
X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])
X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])

print(X_train.shape, X_val.shape, X_test.shape, X_dev.shape)

# Evaluate the naive implementation of the loss we provided for you:
from cs231n.classifiers.linear_svm import svm_loss_naive, svm_loss_vectorized
import time

# generate a random SVM weight matrix of small numbers
W = np.random.randn(3073, 10) * 0.0001

loss, grad = svm_loss_vectorized(W, X_dev, y_dev, 0.000005)
print('loss: %f' % (loss, ))
Exemplo n.º 21
0
def old_vect_grad_desc():

    ## Old version of gradient descent

    from cs231n.classifiers.linear_svm import svm_loss_naive
    from cs231n.classifiers.linear_svm import svm_loss_vectorized

    loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.5, dograd=False)
    loss2, grad = svm_loss_vectorized(W, X_dev, y_dev, 0.5)

    print "loss: ", loss
    print "loss2: ", loss2

    # data
    #reg = 10000
    X = X_dev
    y = y_dev
    delta = 1
    loss_sum = 0.0
    loss = 0
    reg = 0.5
    step_size = 1e-5
    dW = np.zeros(W.shape)  # initialize the gradient as zero
    dW2 = np.zeros(W.shape)  # initialize the gradient as zero

    scores = X.dot(W)

    # calculating  'correct_class_scores':
    unit_mat = np.ones([X.shape[0], W.shape[1]])

    c = range(0, X.shape[0])
    correct_class_score = np.identity(X.shape[0])
    correct_class_score[c, c] = scores[c, y[c]]

    correct_class_score = np.dot(correct_class_score, unit_mat)
    correct_class_score[c, y[c]] = 0

    loss = scores - correct_class_score + unit_mat * delta

    print loss.shape
    loss_uni = np.ones(loss.shape)
    loss_uni[loss < 0] = 0
    print np.sum(loss_uni, axis=1)
    print loss_uni

    ## grad for y_i <> j
    dW2 = np.dot(np.transpose(X), loss_uni)
    print "dw2:", dW2.shape

    gr_minus = np.sum(loss > 0, axis=1)
    print "gr_minus", gr_minus

    zero_mat = np.zeros([X.shape[0], W.shape[1]])
    zero_mat[c, y] = 1

    gr_minus = np.dot(np.identity(gr_minus.shape[0]) * gr_minus, zero_mat)
    print gr_minus.shape
    print gr_minus[0:20, 0:20]

    print "x shape: ", X.shape

    dW2 -= np.dot(np.transpose(X), gr_minus)
    dW2 /= X.shape[0]
    dW2 += 0.5 * reg * 2 * W

    print "loss matr dimensions: ", loss.shape
    print X.shape
    k = 1

    for k in xrange(X.shape[1]):
        for l in xrange(W.shape[1]):

            X_h = np.zeros([X.shape[0], W.shape[1]])
            X_h[:, l] = X[:, k] * step_size

            c = range(0, X.shape[0])
            unit_mat = np.ones([X.shape[0], W.shape[1]])
            correct_class_score = np.identity(X.shape[0])
            correct_class_score[c, c] = X_h[c, y[c]]
            correct_class_score = np.dot(correct_class_score, unit_mat)
            correct_class_score[c, y[c]] = 0

            loss_h = loss + X_h - correct_class_score
            loss_h[loss_h < 0] = 0
            loss_h[c, y[c]] = 0

            loss_ph_sum = (np.sum(loss_h) / X.shape[0])

            loss_h = loss - X_h + correct_class_score
            loss_h[loss_h < 0] = 0
            loss_h[c, y[c]] = 0
            loss_mh_sum = (np.sum(loss_h) / X.shape[0])

            dW[k, l] = loss_ph_sum - loss_mh_sum

    dW = (dW + reg * 2 * W * step_size + step_size**2) / (2 * step_size)

    print dW.shape
    print dW[0, 0:10]

    print "dW2"
    print dW2.shape
    print dW2[0, 0:10]

    loss[loss < 0] = 0
    loss[c, y[c]] = 0

    difference = np.linalg.norm(dW - dW2, ord='fro')
    print("diff ", difference)

    loss_sum = (np.sum(loss) / X.shape[0]) + 0.5 * reg * np.sum(W * W)

    print loss_sum
Exemplo n.º 22
0
# Numerically compute the gradient along several randomly chosen dimensions, and
# compare them with your analytically computed gradient. The numbers should match
# almost exactly along all dimensions.
from cs231n.gradient_check import grad_check_sparse

f = lambda w: svm_loss_naive(w, X_dev, Y_dev, 0.0)[0]
grad_numerical = grad_check_sparse(f, W, grad)

# do the gradient check once again with regularization turned on
# you didn't forget the regularization gradient did you?
loss, grad = svm_loss_naive(W, X_dev, Y_dev, 5e1)
f = lambda w: svm_loss_naive(w, X_dev, Y_dev, 5e1)[0]
grad_numerical = grad_check_sparse(f, W, grad)

#check the gradient of vectorized svm
f = lambda w: svm_loss_vectorized(w, X_dev, Y_dev, 0.0)[0]
grad_numerical = grad_check_sparse(f, W, grad)

# do the gradient check once again with regularization turned on
# you didn't forget the regularization gradient did you?
loss, grad = svm_loss_vectorized(W, X_dev, Y_dev, 5e1)
f = lambda w: svm_loss_vectorized(w, X_dev, Y_dev, 5e1)[0]
grad_numerical = grad_check_sparse(f, W, grad)

# Next implement the function svm_loss_vectorized; for now only compute the loss;
# we will implement the gradient in a moment.
tic = time.time()
loss_naive, grad_naive = svm_loss_naive(W, X_dev, Y_dev, 0.000005)
toc = time.time()
print('Naive loss: %e computed in %fs' % (loss_naive, toc - tic))
Exemplo n.º 23
0
# do the gradient check once again with regularization turned on
# you didn't forget the regularization gradient did you?
loss, grad = svm_loss_naive_test(W, X_dev, y_dev, 1e2)
f = lambda w: svm_loss_naive_test(w, X_dev, y_dev, 1e2)[0]
grad_numerical = grad_check_sparse(f, W, grad)

# Next implement the function svm_loss_vectorized; for now only compute the loss;
# we will implement the gradient in a moment.
tic = time.time()
loss_naive, grad_naive = svm_loss_naive_test(W, X_dev, y_dev, 0.00001)
toc = time.time()
print 'Naive loss: %e computed in %fs' % (loss_naive, toc - tic)

tic = time.time()
loss_vectorized, grad_vectorized = svm_loss_vectorized(W, X_dev, y_dev, 0.00001)
toc = time.time()
print 'Vectorized loss and gradient: computed in %fs' % (toc - tic)
pdb.set_trace()

# In the file linear_classifier.py, implement SGD in the function
# LinearClassifier.train() and then run it with the code below.
from cs231n.classifiers import LinearSVM
#svm = LinearSVM()
#tic = time.time()
#loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=5e4,
#                      num_iters=1500, verbose=True)
#toc = time.time()
#print 'That took %fs' % (toc - tic)
#pdb.set_trace()
# *Hint: the SVM loss function is not strictly speaking differentiable*
#
# **Your Answer:** *fill this in.*

# In[ ]:

# Next implement the function svm_loss_vectorized; for now only compute the loss;
# we will implement the gradient in a moment.
tic = time.time()
loss_naive, grad_naive = svm_loss_naive(W, X_train, y_train, 0.00001)
toc = time.time()
print 'Naive loss: %e computed in %fs' % (loss_naive, toc - tic)

from cs231n.classifiers.linear_svm import svm_loss_vectorized
tic = time.time()
loss_vectorized, _ = svm_loss_vectorized(W, X_train, y_train, 0.00001)
toc = time.time()
print 'Vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic)

# The losses should match but your vectorized implementation should be much faster.
print 'difference: %f' % (loss_naive - loss_vectorized)


# In[ ]:

# Complete the implementation of svm_loss_vectorized, and compute the gradient
# of the loss function in a vectorized way.

# The naive implementation and the vectorized implementation should match, but
# the vectorized version should still be much faster.
tic = time.time()
Exemplo n.º 25
0
def test2():
    cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

    num_training = 49000
    num_validation = 1000
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]

    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]

    num_test = 1000
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    # print 'Train data shape: ', X_train.shape
    # print 'Train labels shape: ', y_train.shape
    # print 'Validation data shape: ', X_val.shape
    # print 'Validation labels shape: ', y_val.shape
    # print 'Test data shape: ', X_test.shape
    # print 'Test labels shape: ', y_test.shape

    # Preprocessing: reshape the image data into rows
    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_val = np.reshape(X_val, (X_val.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))

    # As a sanity check, print out the shapes of the data
    # print 'Training data shape: ', X_train.shape
    # print 'Validation data shape: ', X_val.shape
    # print 'Test data shape: ', X_test.shape

    mean_image = np.mean(X_train, axis=0)
    #print mean_image[:10]
    #plt.figure(figsize=(4,4))
    #plt.imshow(mean_image.reshape((32,32,3)).astype('uint8'))
    #plt.savefig('./figures/svm_mean.png')
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image

    X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]).T
    X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]).T
    X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]).T

    svm_sgd(X_train, y_train, X_val, y_val, X_test, y_test)
    #softmax_sgd(X_train,y_train, X_val, y_val,X_test,y_test)
    return
    W = np.random.randn(10, 3073) * 0.0001
    # loss, grad = svm_loss_naive(W,X_train, y_train,0.00001)
    # print 'loss: %f' %(loss,)

    #loss, grad = svm_loss_naive(W,X_train,y_train,0.0)
    #loss, grad = svm_loss_vectorized(W,X_train,y_train,0.0)

    #f = lambda w: svm_loss_naive(w,X_train, y_train,0.0)[0]
    #grad_check_sparse(f, W, grad, 10)

    tic = time.time()
    loss_naive, grad_naive = svm_loss_naive(W, X_train, y_train, 0.00001)
    toc = time.time()
    print 'Naive loss: %e ,computed in %fs' % (loss_naive, toc - tic)

    tic = time.time()
    loss_vectorized, grad_vector = svm_loss_vectorized(W, X_train, y_train,
                                                       0.00001)
    toc = time.time()
    print 'Vectorized loss: %e, computed in %fs' % (loss_vectorized, toc - tic)

    # The losses should match but your vectorized implementation should be much faster.
    print 'difference: %f' % (loss_naive - loss_vectorized)
    difference = np.linalg.norm(grad_naive - grad_vector, ord='fro')
    print 'difference of grad :%f' % difference
 def loss(self, X_batch, y_batch, reg):
     return linear_svm.svm_loss_vectorized(self.W, X_batch, y_batch, reg)
Exemplo n.º 27
0
# print(np.mean(xTrain,axis = 0).shape)
# print(np.std(xTrain,axis = 0).shape)

xTrain = (xTrain - np.mean(xTrain, axis=0)) / np.std(xTrain, axis=0)
# print(np.mean(xTrain,axis = 0))
# print(np.std(xTrain,axis = 0))

W = 2 * np.random.random_sample((xTrain.shape[1], 10)) - 1
reg = 10

start = time.time()
loss, dw = linear_svm.svm_loss_naive(W, xTrain, yTrain, 0)
print("Time for Naive ", time.time() - start)

start = time.time()
lossVector, dwVector = linear_svm.svm_loss_vectorized(W, xTrain, yTrain, 0)
print("Time for Vectorised approach ", time.time() - start)

# def f(w):
# return linear_svm.svm_loss_naive(w,xTrain,yTrain,0)[0]
# gradient_check.grad_check_sparse(f, W, dw, num_checks=10, h=1e-5)

cvFold = 5
learningRates = [1e-3]
regStrengths = [0, 100, 200, 400, 500]

xTrainCV = np.array(np.split(xTrain, cvFold))
yTrainCV = np.array(np.split(yTrain, cvFold))

accuracy = -1
alphaBest = -1
Exemplo n.º 28
0
# It is possible that once in a while a dimension in the gradcheck will not match exactly. What could such a discrepancy be caused by? Is it a reason for concern? What is a simple example in one dimension where a gradient check could fail? How would change the margin affect of the frequency of this happening? *Hint: the SVM loss function is not strictly speaking differentiable*
#
# **Your Answer:** *fill this in.*

# In[33]:

# Next implement the function svm_loss_vectorized; for now only compute the loss;
# we will implement the gradient in a moment.
tic = time.time()
loss_naive, grad_naive = svm_loss_naive(W, X_dev, y_dev, 0.000005)
toc = time.time()
print('Naive loss: %e computed in %fs' % (loss_naive, toc - tic))

from cs231n.classifiers.linear_svm import svm_loss_vectorized
tic = time.time()
loss_vectorized, _ = svm_loss_vectorized(W, X_dev, y_dev, 0.000005)
toc = time.time()
print('Vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic))

# The losses should match but your vectorized implementation should be much faster.
print('difference: %f' % (loss_naive - loss_vectorized))

# In[34]:

# Complete the implementation of svm_loss_vectorized, and compute the gradient
# of the loss function in a vectorized way.

# The naive implementation and the vectorized implementation should match, but
# the vectorized version should still be much faster.
tic = time.time()
_, grad_naive = svm_loss_naive(W, X_dev, y_dev, 0.000005)
Exemplo n.º 29
0
# It is possible that once in a while a dimension in the gradcheck will not match exactly. What could such a discrepancy be caused by? Is it a reason for concern? What is a simple example in one dimension where a gradient check could fail? *Hint: the SVM loss function is not strictly speaking differentiable*
# 
# **Your Answer:** *fill this in.*

# In[ ]:

# Next implement the function svm_loss_vectorized; for now only compute the loss;
# we will implement the gradient in a moment.
tic = time.time()
loss_naive, grad_naive = svm_loss_naive(W, X_dev, y_dev, 0.00001)
toc = time.time()
print 'Naive loss: %e computed in %fs' % (loss_naive, toc - tic)

from cs231n.classifiers.linear_svm import svm_loss_vectorized
tic = time.time()
loss_vectorized, _ = svm_loss_vectorized(W, X_dev, y_dev, 0.00001)
toc = time.time()
print 'Vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic)

# The losses should match but your vectorized implementation should be much faster.
print 'difference: %f' % (loss_naive - loss_vectorized)


# In[ ]:

# Complete the implementation of svm_loss_vectorized, and compute the gradient
# of the loss function in a vectorized way.

# The naive implementation and the vectorized implementation should match, but
# the vectorized version should still be much faster.
tic = time.time()