Exemplo n.º 1
0
def loss(W, b, X, Y):
    """ Compute cross-entropy loss """
    loss = 0.
    forward_prop = forward(W, b, X)
    for n in range(X.shape[0]):
        for c in range(NUM_CLASSES):
            loss += Y[n, c] * np.log(forward_prop[n, c])
    loss = -1. * loss
    for d in range(INPUT_DIM):
        for c in range(NUM_CLASSES):
            loss += (ALPHA / 2.) * np.square(W[c, d]) if L2_REGULARIZATION else ALPHA * np.abs(W[c, d])
    return loss
Exemplo n.º 2
0
def gradients(W, b, X, Y):
    """
    W: shape (10, 784)
    b: shape (10,)
    X: shape (num examples, 784)
    Y: shape (num examples, 10)
    """
    forward_prop = forward(W, b, X)
    other = np.exp(np.dot(X, W.T) + b)
    dW, db, dX = np.zeros((NUM_CLASSES, INPUT_DIM)), np.zeros(NUM_CLASSES), np.zeros((BATCH_SIZE, INPUT_DIM))
    for n in range(BATCH_SIZE):
        for c in range(NUM_CLASSES):
            for d in range(INPUT_DIM):
                dW_ncd = -1. * Y[n, c] / forward_prop[n, c]
                dW_ncd = dW_ncd * X[n, d] * forward_prop[n, c] * (1. - forward_prop[n, c])
                dW[c, d] += dW_ncd
                dX_ncd = forward_prop[n, c] * (W[c, d] - np.sum(other * W[:, d], axis=1)[n] / np.sum(other, axis=1)[n])
                dX_ncd = dX_ncd * -1. * Y[n, c] / forward_prop[n, c]
                dX[n, d] += dX_ncd
                dX[n, d] += 2 * ALPHA * W[c, d] if L2_REGULARIZATION else ALPHA * np.sign(W[c, d])
            db_nc = -1. * Y[n, c] / forward_prop[n, c]
            db_nc = db_nc * forward_prop[n, c] * (1. - forward_prop[n, c])
            db[c] += db_nc
    return {"dW":dW, "db":db, "dX":dX}
Exemplo n.º 3
0
    train_X, train_Y, test_X, test_Y = load_data()

    # Generate a random batch on *test data*
    X, Y = get_batch(test_X, test_Y)

    # Perform adversarial attacks; for each of these, you should also keep
    # score of the classifier's accuracy during each type of attack to compare
    # afterwards

    # First compute gradients
    grad = gradients(W, b, X, Y)

    Y = np.argmax(Y, axis=1)

    # 0. original example (not an attack!)
    Y_hat_original = np.argmax(forward(W, b, X), axis=1)
    score = evaluate(Y, Y_hat_original)
    print("[original]\tAccuracy {}%".format(score))
    print(Y_hat_original)

    # 1. fast-gradient sign method (FGSM)
    X_fgsm = fgsm(X, grad["dX"], 2 * EPSILON)
    Y_hat_fgsm = np.argmax(forward(W, b, X_fgsm), axis=1)
    score = evaluate(Y, Y_hat_fgsm)
    print("[  FGSM]\tAccuracy {}%".format(score))
    print(Y_hat_fgsm)

    # 2. targeted fast-gradient sign method (T-FGSM)
    Y_false = generate_false_labels(Y)
    X_tfgsm = targeted_fgsm(X, grad["dX"], 2 * EPSILON)
    Y_hat_tfgsm = np.argmax(forward(W, b, X_tfgsm), axis=1)
Exemplo n.º 4
0
        grad = gradients(W, b, X, Y)
        W = W - LEARNING_RATE * grad["dW"]
        b = b - LEARNING_RATE * grad["db"]

        # Save updated parameters
        if not os.path.isdir("./params/"):
            os.mkdir("./params/")

        if L2_REGULARIZATION:
            np.save("./params/W_L2.npy", W)
            np.save("./params/b_L2.npy", b)
        else:
            np.save("./params/W_L1.npy", W)
            np.save("./params/b_L1.npy", b)

        # Check model accuracy on test data
        if it % 5 == 0:
            scores, losses = list(), list()
            for n in range(10):
                X, Y = get_batch(test_X, test_Y)
                Y_out = np.argmax(forward(W, b, X), axis=1)
                
                # Compute loss on X
                losses.append(loss(W, b, X, Y))
                
                # Compute accuracy on test data
                Y = np.argmax(Y, axis=1)
                scores.append(evaluate(Y, Y_out))
                
            print("Iter {}\tAccuracy {}%\t Loss {}".format(it, np.round(np.mean(scores)), np.mean(losses)))
            
Exemplo n.º 5
0
if __name__ == "__main__":
    # Load classifier parameters
    W = np.load("./params/W.npy")
    b = np.load("./params/b.npy")

    # Load data
    train_X, train_Y, test_X, test_Y = load_data()

    # Generate a random batch on *test data*
    X, Y = get_batch(test_X, test_Y)

    # First compute gradients
    grad = gradients(W, b, X, Y)
    Y = np.argmax(Y, axis=1)

    # 0. original example (not an attack!)
    Y_hat_original = np.argmax(forward(W, b, X), axis=1)

    # 1. fast-gradient sign method (FGSM)
    X_fgsm = fgsm(X, grad["dX"], EPSILON)
    Y_hat_fgsm = np.argmax(forward(W, b, X_fgsm), axis=1)

    # Print adversarial examples that cause the classifier to change output
    diffs = list()
    for i in range(BATCH_SIZE):
        if Y_hat_original[i] != Y_hat_fgsm[i]:
            print("output on original: {}\noutput on adversar: {}\n".format(
                Y_hat_original[i], Y_hat_fgsm[i]))
            plot_image(X[i].reshape((28, 28)))
            plot_image(X_fgsm[i].reshape((28, 28)))