示例#1
0
def get_2d_intuition_data(n_samples=100, seed=10, L=10, C=1., **kwargs):
    X, y = get_toy_data(n_samples=n_samples, seed=seed)
    trainer = svm.SVMTrainer('linear', C)
    predictor = trainer.train(X, y, remove_zero=False)
    alpha, b = predictor._weights, predictor._bias

    weight = (alpha * predictor._support_vector_labels).reshape(-1, 1) * predictor._support_vectors
    weight = weight.sum(axis=0)
    y_p = y.copy()
    dist = np.abs(np.dot(X, weight.T) + b)
    flip_inds = np.argsort(dist)[::-1][:L]
    flip_pnts = X[flip_inds]
    y_p[flip_inds] *= -1
    # plt.figure()
    # svm_plot(X, y_p)
    # boundary_plot(X, predictor)
    # plt.scatter(flip_pnts[:, 0], flip_pnts[:, 1], s=85 * 2, facecolors='none', edgecolors='green')
    # plt.show()
    return X, y_p, flip_pnts
示例#2
0
文件: data.py 项目: luzai/opt18
def get_adv_data(n_samples=100,
                 seed=16,
                 C=1.,
                 R=25,
                 beta1=0.1,
                 beta2=0.1,
                 L=10,
                 **kwargs):
    X, y = get_toy_data(
        n_samples=n_samples,
        seed=seed,
    )
    trainer = svm.SVMTrainer('linear', C)
    predictor = trainer.train(X, y, remove_zero=False)
    alpha, b = predictor._weights, predictor._bias

    s = predictor.score(X)
    s *= predictor._support_vector_labels
    s = normalize(s)

    # svm_plot(X, y)
    # boundary_plot(X, predictor)
    # plt.show()

    error_hist = []
    yp_hist = []
    flip_pnts_hist = []
    print('training error on untainted data is ', calc_error(s, y))

    for i in range(R):
        alpha_rnd = np.random.uniform(-C, C, size=alpha.shape)
        b_rnd = np.random.uniform(-C, C)

        predictor_rnd = svm.SVMPredictor(
            weights=alpha_rnd,
            support_vectors=predictor._support_vectors,
            support_vector_labels=predictor._support_vector_labels,
            bias=b_rnd,
            sigma=predictor._sigma,
            kernel=predictor._kernel)
        # weight = (alpha_rnd * predictor_rnd._support_vector_labels).reshape(-1, 1) * predictor._support_vectors
        # weight = weight.sum(axis=0)
        print('training error of random svm is ',
              calc_error(predictor_rnd.predict(X), y))
        # svm_plot(X, y)
        # boundary_plot(X, predictor_rnd)
        # plt.show()

        q = predictor_rnd.score(X)
        q *= predictor._support_vector_labels
        q = normalize(q)

        v = alpha / C - beta1 * s - beta2 * q
        # plt.figure()
        # plt.plot(alpha,'o')
        # plt.plot(s,'x')
        # plt.plot(q,'.')
        # plt.legend()
        # plt.show()
        k = np.argsort(v, axis=0)
        y_p = y.copy()
        y_p[k[0:L]] *= -1

        predictor_new = trainer.train(X, y_p)
        print('training error on tainted data  is ',
              calc_error(predictor_new.predict(X), y))
        # plt.figure()
        # svm_plot(X, y_p)
        # boundary_plot(X, predictor_new)
        flip_pnts = X[k[0:L]]
        # plt.scatter(flip_pnts[:, 0], flip_pnts[:, 1], s=85 * 2, facecolors='none', edgecolors='green')
        # plt.show()
        error_hist.append(calc_error(predictor_new.predict(X), y))
        yp_hist.append(y_p)
        flip_pnts_hist.append(flip_pnts)

    print(np.max(error_hist), error_hist)
    y_p = yp_hist[np.argmax(error_hist)]
    flip_pnts = flip_pnts_hist[np.argmax(error_hist)]
    return X, y_p, flip_pnts
示例#3
0
# from lz import *
import logging, numpy as np, matplotlib.pyplot as plt
logging.root.setLevel(logging.ERROR)
import svm, data

exp = 'proc'
if exp == 'proc':
    C = 1.
    kernel = 'rbf'
    proc_train = np.loadtxt('proc-train', delimiter=' ')
    X, y = proc_train[:, 1:], proc_train[:, 0]
    X_test = np.loadtxt('proc-test', delimiter=' ')
    trainer = svm.SVMTrainer(kernel, C)
    predictor = trainer.train(X, y, remove_zero=True)
    y_pred = predictor.predict(X_test)
    np.savetxt('y-pred', y_pred, delimiter=' ')

elif exp == 'sonar':
    n_samples = 208
    n_features = 60
    C = 1.
    L = 208 // 10
    kernel = 'rbf'

    X, y = data.get_sonar_data()
    X, y, X_val, y_val = data.split_train_test(X, y)
    trainer = svm.SVMTrainer(kernel, C)
    predictor = trainer.train(X, y, remove_zero=True)
    print(predictor.error(X_val, y_val))

    X, y, _ = data.apply_rand_flip(X, y, L)
import numpy as np
import pandas as pd
import svm

data = pd.read_table('wdbc.data', sep=',', header=None)
X = np.array(data.ix[:, 2:])
y = data.ix[:, 1]
y = np.array([1.0 if label == 'B' else -1.0 for label in y]).ravel()
n_samples, n_features = X.shape

c = 0.1
trainer = svm.SVMTrainer(svm.Kernel.linear(), c)
predictor = trainer.train(X, y)

test = X[0].reshape(1, n_features)
predictions = [
    predictor.predict(sample.reshape(1, n_features)) for sample in X
]
示例#5
0
import matplotlib.pyplot as plt
import svm
from kernels import Kernel

iris = datasets.load_iris()
y = iris.target
y = np.asarray([-1 if x == 0 else x for x in y])
sel = [y != 2]
y = y[sel]
X = iris.data[:, :2]
X = X[sel]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

C = 0.1
trainer = svm.SVMTrainer(kernel=Kernel.linear(), c=C)
model = trainer.train(X_train, y_train)

x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
h = 0.02
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

xxyy = np.stack((xx.ravel(), yy.ravel()), axis=-1)

result = []
for i in range(len(xxyy)):
    result.append(model.predict(xxyy[i]))

Z = np.array(result).reshape(xx.shape)
        result.append(predictor.predict(point))

    Z = np.array(result).reshape(xx.shape)

    plt.contourf(xx, yy, Z,
                 cmap=cm.Paired,
                 levels=[-0.001, 0.001],
                 extend='both',
                 alpha=0.5)
    plt.scatter(flatten(X[:, 0]), flatten(X[:, 1]),
                c=flatten(y), cmap=cm.Paired)
    plt.xlim(x_min, x_max)
    plt.ylim(y_min, y_max)
    plt.savefig(filename)


num_samples=100
num_features=2
grid_size=20

# Random data
samples = np.matrix(np.random.normal(size=num_samples * num_features)
                    .reshape(num_samples, num_features))
labels = 2 * (samples.sum(axis=1) > 0) - 1.0

# Train and predict
trainer = svm.SVMTrainer(svm.Kernel.gaussian(0.5), 0.1)
predictor = trainer.train(samples, labels)

plot(predictor, samples, labels, grid_size, "svm-demo.pdf")
示例#7
0
def test_on_dataset(dataset = 'sonar', kernel = 'rbf', n_samples = 208, n_features = 60):

    l = 1;
    fig = plt.figure(figsize=(8,3))
    for C in [1, 10, 100, 1000]:
        ax = fig.add_axes([0.25*l-0.20, 0.3,0.18,0.4])
        
        acc_1 = []
        acc_2 = []
        acc_3 = []
        for L in [0, n_samples//10, n_samples//5, int(n_samples//3.3333), int(n_samples//2.5)]:
            X_, y_ = data.get_train_data(dataset, n_samples, n_features)
            X_c, y_c, = data.split_train_test(X_, y_)
            # trainer = svm.SVMTrainer(kernel, C)
            # predictor = trainer.train(X, y, remove_zero=True)
            # print(predictor.error(X_val, y_val))
            # print(acc_1, acc_2, acc_3)
            acc_1.append(0)
            acc_2.append(0)
            acc_3.append(0)
            for i in range(5):
                X, y = [], []
                
                for j in range(5):
                    if j != i:
                        X.extend(X_c[j])
                        y.extend(y_c[j])
                X_val, y_val = np.array(X_c[i]), np.array(y_c[i])
                X, y = np.array(X), np.array(y)
                X, y, flip_pnts = data.apply_rand_flip(X, y, L)

                trainer = svm.SVMTrainer(kernel, C)
                predictor = trainer.train(X, y, remove_zero=True)
                acc_1[-1] += (1-predictor.error(X_val, y_val))
            #print(acc_1[-1])

    
                trainer = svm.SVMTrainer(kernel, C, ln_robust=True, mu=0.1)
                predictor = trainer.train(X, y, remove_zero=True)
                acc_2[-1] += (1-predictor.error(X_val, y_val))
            #print(acc_2[-1])
            
                trainer = svm.SVMTrainer(kernel, C, ln_robust=True, mu=0.5 - 1e-4)
                predictor = trainer.train(X, y, remove_zero=True)
                acc_3[-1] += (1-predictor.error(X_val, y_val))
            #print(acc_3[-1])
            acc_1[-1] /= 5
            acc_2[-1] /= 5
            acc_3[-1] /= 5
        acc_1 = np.array(acc_1)
        acc_2 = np.array(acc_2)
        acc_3 = np.array(acc_3)
        flip_ratio = np.linspace(0, 40,acc_1.shape[0])
        plt.ylim((0.3,1))
        print(acc_1)
        ax.plot(flip_ratio,acc_1,color="blue", label='mu=0')
        ax.plot(flip_ratio,acc_2,color="red", label='mu=0.1')
        ax.plot(flip_ratio,acc_3,color="black", label='mu=0.5')
        ax.set_xlabel('% flipped labels')
        ax.set_ylabel('test acc')
        ax.set_title("C = %d" % C)
        ax.spines['right'].set_color('black')
        ax.spines['top'].set_color('black')
        
        ax.spines['left'].set_color('black')
        
        ax.spines['bottom'].set_color('black')
        ax.patch.set_facecolor("white")
        ax.grid(color='r', linestyle='--',linewidth=1, alpha=0.3)
        #if l == 1:
        #    ax.legend(facecolor='white')
        l += 1
    fig.suptitle(dataset, fontsize=12)
示例#8
0
            num_samples, num_features))
    y = np.ravel(2 * (X.sum(axis=1) > 0) - 1.0)
    linearly_separable = (X, y)

    names = [
        "SVM Linear c = 1", "SVM RBF c = 1", "SVM Linear c = 100",
        "SVM RBF c = 100"
    ]
    datasets = [
        make_moons(noise=0.1, random_state=0),
        make_circles(noise=0.1, factor=0.5, random_state=1),
        linearly_separable,
    ]

    classifiers = [
        svm.SVMTrainer(kernel=linear(), c=1),
        svm.SVMTrainer(kernel=radial_basis(gamma=1), c=1),
        svm.SVMTrainer(kernel=linear(), c=100),
        svm.SVMTrainer(kernel=radial_basis(gamma=1), c=100),
    ]

    print(linear())

    figure = plt.figure(figsize=(27, 9))
    i = 1
    for ds_cnt, ds in enumerate(datasets):
        X, y = ds
        ## modify labels to correspond to SVM categories {-1, 1}
        y[y == 0] = -1

        X = StandardScaler().fit_transform(X)
示例#9
0
import svm
import numpy as np
import cvxopt
import plotFigure
"""
Some functions
"""

X = np.array([[1, 2], [2, 2], [0, 0], [-2, 3]]).copy()
Y = np.array([-1., -1., 1., 1.]).copy()

trainer = svm.SVMTrainer(svm.Kernel.gaussian(5), c=200)
predictor = trainer.train(X, Y)
plotFigure.plot(predictor, X, Y, grid_size=100, filename='TEST.pdf')

print predictor.predict(np.array([0, 1]))