def fit(self, x, y_multiclass, kernel=poly(1), C=0.001):
        y_multiclass=y_multiclass.reshape(-1).astype(np.float64)
        self.x = sparse.COO(x.astype(np.float64))
        self.m = self.x.shape[0]
        self.y_multiclass = y_multiclass
        self.kernel = kernel
        self.C = C
        ys = [sparse.COO(self.cast(y_multiclass, k)) for k in range(self.n_svm)]
        self.y_matrix = sparse.stack(ys,0)
        del ys
        for k in range(self.n_svm):
            print("training ",k,"th SVM in ",self.n_svm)
            y = self.y_matrix[k, :].reshape((-1,1))
            yx = y * self.x
            G = kernel(yx, yx) # Gram matrix

            compensate = (sparse.eye(self.m)*1e-7).astype(np.float64)
            G = (G + compensate)
            objective = cp.Maximize(cp.sum(self.a[k])-(1/2)*cp.quad_form(self.a[k], G.tocsr()))

            if not objective.is_dcp():
                print("Not solvable!")
                assert objective.is_dcp()
            constraints = [self.a[k] <= C, cp.sum(cp.multiply(self.a[k],y.todense())) == 0] # box constraint
            prob = cp.Problem(objective, constraints)
            result = prob.solve()
            x_pos = x[y.todense()[:,0]==1,:]
            x_neg = x[y.todense()[:,0]==-1,:]
            b_min = -np.min(self.wTx(k,x_pos)) if x_pos.shape[0]!=0 else 0
            b_max = -np.max(self.wTx(k,x_neg)) if x_neg.shape[0]!=0 else 0
            self.b[k,0] = (1/2)*(b_min + b_max)
        self.a_matrix = np.stack([i.value.reshape(-1) for i in self.a],0)
        self.a_matrix = sparse.COO(self.a_matrix)
示例#2
0
def main():

    data_file = 'ionosphere.data'

    data = np.genfromtxt(data_file, delimiter=',', dtype='|S10')
    instances = np.array(data[:, :-1], dtype='float')
    labels = np.array(data[:, -1] == 'g', dtype='int')

    n, d = instances.shape
    nlabels = labels.size

    if n != nlabels:
        raise Exception('Expected same no. of feature vector as no. of labels')

    train_data = instances[:200]  # first 200 examples
    train_labels = labels[:200]  # first 200 labels

    test_data = instances[200:]  # example 201 onwards
    test_labels = labels[200:]  # label 201 onwards

    # parameters for the kernels we'll use
    gamma = 1.0 / d
    intercept = 0

    kernel_dict = {
        'linear': ker.linear,
        'polynomial': ker.poly(degree=3, gamma=gamma),
        'rbf/gaussian': ker.rbf(gamma=gamma),
        'sigmoid/arctan': ker.sigmoid(gamma=gamma)
    }

    for kernel_name in sorted(kernel_dict.keys()):
        print 'Training an SVM using the %s kernel...' % kernel_name
        svm_classifier = svm_train(train_data, train_labels,
                                   kernel_dict[kernel_name])
        confusion_mat = evaluate_classifier(svm_classifier, test_data,
                                            test_labels)
        print_evaluation_summary(confusion_mat)
        print
示例#3
0
def main():

    data_file = 'ionosphere.data'

    data = np.genfromtxt(data_file, delimiter=',', dtype='|S10')
    instances = np.array(data[:, :-1], dtype='float')
    labels = np.array(data[:, -1] == 'g', dtype='int')

    n, d = instances.shape
    nlabels = labels.size

    if n != nlabels:
        raise Exception('Expected same no. of feature vector as no. of labels')

    train_data = instances[:200]  # first 200 examples
    train_labels = labels[:200]  # first 200 labels

    test_data = instances[200:]  # example 201 onwards
    test_labels = labels[200:]  # label 201 onwards

    # parameters for the kernels we'll use
    gamma = 1.0/d
    intercept = 0

    kernel_dict = {'linear': ker.linear,
                   'polynomial': ker.poly(degree=3, gamma=gamma),
                   'rbf/gaussian': ker.rbf(gamma=gamma),
                   'sigmoid/arctan': ker.sigmoid(gamma=gamma)}

    for kernel_name in sorted(kernel_dict.keys()):
        print 'Training an SVM using the %s kernel...' % kernel_name
        svm_classifier = svm_train(train_data, train_labels,
                                   kernel_dict[kernel_name])
        confusion_mat = evaluate_classifier(svm_classifier, test_data,
                                            test_labels)
        print_evaluation_summary(confusion_mat)
        print
    def __init__(self, m,n_class):
        self.n_svm = n_class * (n_class - 1)//2
        self.m = m # number of samples
        self.n_class = n_class

        # multiplier
        self.a = [cp.Variable(shape=(m,1),pos=True) for i in range(self.n_svm)]
        # bias
        self.b = np.zeros((self.n_svm,1))

        # kernel function  should input x [n,d] y [m,d] output [n,m]
        # Example of kernels: poly(3)
        self.kernel = poly(1)

        # Binary setting for every SVM,
        # Mij says the SVMj should give
        # Mij label to sample with class i
        self.lookup_matrix=np.zeros((self.n_class, self.n_svm))

        # The two classes SVMi concerns,
        # lookup_class[i]=[pos, neg]
        self.lookup_class=np.zeros((self.n_svm, 2))

        k=0
        for i in range(n_class-1):
            for j in range(i+1,n_class):
                self.lookup_class[k, 0]=i
                self.lookup_class[k, 1]=j
                k += 1

        for i in range(n_class):
            for j in range(self.n_svm):
                if i == self.lookup_class[j,0] or i == self.lookup_class[j,1]:
                    if self.lookup_class[j, 0]==i:
                        self.lookup_matrix[i,j]=1.0
                    else:
                        self.lookup_matrix[i,j]=-1.0
示例#5
0
args = parser.parse_args()

if args.dataset == "himoon":
    x, y, _, _, xtest, ytest = data_gen.himoon(n_samples=args.n_samples,
                                               n_dims=args.n_dims)

elif args.dataset == "mmgauss":
    x, y, _, _, xtest, ytest = data_gen.mmgauss(n_samples=args.n_samples,
                                                n_dims=args.n_dims)
else:
    raise ValueError("Unknown dataset")

kernels = dict(
    zip(
        ["rbf", "linear", "poly", "sigmoid"],
        [k.rbf(), k.linear(), k.poly(),
         k.sigmoid()],
    ))
try:
    kernel = kernels.get(args.kernel)
except KeyError as e:
    kernel = "linear"

df = pd.DataFrame()

print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}: Running " +
      f"{args.dataset} with {args.dimension} dimensions and " +
      f"epsilon={args.epsilon} with {args.kernel} kernel for " +
      f"{args.repetitions} repetitions.")

for run in range(args.repetitions):