def do_comparison(X_train, y_train, X_test, y_test, dataset): # evaluate both svms on a given datasets, generate plots Cs = 10.**np.arange(-4, 1) multisvm = MultiSVM() svm = OneSlackSSVM(MultiClassClf(), tol=0.01) accs_pystruct, times_pystruct = eval_on_data(X_train, y_train, X_test, y_test, svm, Cs=Cs) accs_svmstruct, times_svmstruct = eval_on_data(X_train, y_train, X_test, y_test, multisvm, Cs=Cs) plot_curves(times_svmstruct, times_pystruct, Cs=Cs, title="learning time (s) %s" % dataset, filename="times_%s.pdf" % dataset) plot_curves(accs_svmstruct, accs_pystruct, Cs=Cs, title="accuracy %s" % dataset, filename="accs_%s.pdf" % dataset)
def test_class_weights_rescale_C(): # check that our crammer-singer implementation with class weights and # rescale_C=True is the same as LinearSVC's c-s class_weight implementation raise SkipTest("class weight test needs update") from sklearn.svm import LinearSVC X, Y = make_blobs(n_samples=210, centers=3, random_state=1, cluster_std=3, shuffle=False) X = np.hstack([X, np.ones((X.shape[0], 1))]) X, Y = X[:170], Y[:170] weights = len(Y) / (np.bincount(Y) * len(np.unique(Y))) pbl_class_weight = MultiClassClf(n_features=3, n_classes=3, class_weight=weights, rescale_C=True) svm_class_weight = OneSlackSSVM(pbl_class_weight, C=10, tol=1e-5) svm_class_weight.fit(X, Y) try: linearsvm = LinearSVC(multi_class='crammer_singer', fit_intercept=False, class_weight='balanced', C=10) linearsvm.fit(X, Y) assert_array_almost_equal(svm_class_weight.w, linearsvm.coef_.ravel(), 3) except TypeError: # travis has a really old sklearn version that doesn't support # class_weight in LinearSVC pass
def crammer_singer_classifier(X_train_bias, y_train, num_classes, n_jobs=2, C=1): model = MultiClassClf(n_features=X_train_bias.shape[1], n_classes=num_classes) # n-slack cutting plane ssvm n_slack_svm = NSlackSSVM(model, n_jobs=n_jobs, verbose=0, check_constraints=False, C=C, batch_size=100, tol=1e-2) n_slack_svm.fit(X_train_bias, y_train) return n_slack_svm
def test_simple_1d_dataset_cutting_plane(): # 10 1d datapoints between 0 and 1 X = np.random.uniform(size=(30, 1)) Y = (X.ravel() > 0.5).astype(np.int) # we have to add a constant 1 feature by hand :-/ X = np.hstack([X, np.ones((X.shape[0], 1))]) pbl = MultiClassClf(n_features=2) svm = NSlackSSVM(pbl, check_constraints=True, C=10000) svm.fit(X, Y) assert_array_equal(Y, np.hstack(svm.predict(X)))
def test_equal_class_weights(): # test that equal class weight is the same as no class weight X, Y = make_blobs(n_samples=80, centers=3, random_state=42) X = np.hstack([X, np.ones((X.shape[0], 1))]) X_train, X_test, Y_train, Y_test = X[:40], X[40:], Y[:40], Y[40:] pbl = MultiClassClf(n_features=3, n_classes=3) svm = OneSlackSSVM(pbl, C=10) svm.fit(X_train, Y_train) predict_no_class_weight = svm.predict(X_test) pbl_class_weight = MultiClassClf(n_features=3, n_classes=3, class_weight=np.ones(3)) svm_class_weight = OneSlackSSVM(pbl_class_weight, C=10) svm_class_weight.fit(X_train, Y_train) predict_class_weight = svm_class_weight.predict(X_test) assert_array_equal(predict_no_class_weight, predict_class_weight) assert_array_almost_equal(svm.w, svm_class_weight.w)
def test_blobs_2d_subgradient(): # make two gaussian blobs X, Y = make_blobs(n_samples=80, centers=3, random_state=42) # we have to add a constant 1 feature by hand :-/ X = np.hstack([X, np.ones((X.shape[0], 1))]) X_train, X_test, Y_train, Y_test = X[:40], X[40:], Y[:40], Y[40:] pbl = MultiClassClf(n_features=3, n_classes=3) svm = SubgradientSSVM(pbl, C=1000) svm.fit(X_train, Y_train) assert_array_equal(Y_test, np.hstack(svm.predict(X_test)))
def test_blobs_2d_cutting_plane(): # make two gaussian blobs X, Y = make_blobs(n_samples=80, centers=3, random_state=42) # we have to add a constant 1 feature by hand :-/ X = np.hstack([X, np.ones((X.shape[0], 1))]) X_train, X_test, Y_train, Y_test = X[:40], X[40:], Y[:40], Y[40:] pbl = MultiClassClf(n_features=3, n_classes=3) svm = NSlackSSVM(pbl, check_constraints=True, C=1000, batch_size=1) svm.fit(X_train, Y_train) assert_array_equal(Y_test, np.hstack(svm.predict(X_test)))
def test_class_weights(): X, Y = make_blobs(n_samples=210, centers=3, random_state=1, cluster_std=3, shuffle=False) X = np.hstack([X, np.ones((X.shape[0], 1))]) X, Y = X[:170], Y[:170] pbl = MultiClassClf(n_features=3, n_classes=3) svm = OneSlackSSVM(pbl, C=10) svm.fit(X, Y) weights = 1. / np.bincount(Y) weights *= len(weights) / np.sum(weights) pbl_class_weight = MultiClassClf(n_features=3, n_classes=3, class_weight=weights) svm_class_weight = OneSlackSSVM(pbl_class_weight, C=10) svm_class_weight.fit(X, Y) assert_greater(f1_score(Y, svm_class_weight.predict(X), average='macro'), f1_score(Y, svm.predict(X), average='macro'))
# Loss = toeplitz(np.arange(n_classes)) # Loss = np.ones((n_classes, n_classes)) np.fill_diagonal(Loss, 0.0) # cond_loss = np.dot(Loss, np.expand_dims(prob, 1)) # opt = np.argmin(cond_loss) # print(cond_loss) # print("OPTIMAL IS %f", opt) # we add a constant 1 feature for the bias X_train_bias = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_test_bias = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) model = MultiClassClf(n_features=X_train_bias.shape[1], n_classes=n_classes, Loss=Loss) gmodel = GeneralizedMultiClassClf(n_features=X_train_bias.shape[1], n_classes=n_classes, Loss=Loss) method = 'generalized' # method = 'vanilla' Cs = [1.] # Cs = [6.5, 7., 7.5] for C in Cs: fw_bc_svm = FrankWolfeSSVM(model, C=C, max_iter=300,
def test_crammer_singer_model_class_weight(): X, Y = make_blobs(n_samples=80, centers=3, random_state=42) # we have to add a constant 1 feature by hand :-/ X = np.hstack([X, np.ones((X.shape[0], 1))]) pbl = MultiClassClf(n_features=3, n_classes=3, class_weight=[1, 2, 1]) rng = np.random.RandomState(0) w = rng.uniform(size=pbl.size_joint_feature) # test inference energy x = X[0] y, energy = pbl.inference(x, w, return_energy=True) assert_almost_equal(energy, np.dot(w, pbl.joint_feature(x, y))) # test inference_result: energies = [np.dot(w, pbl.joint_feature(x, y_hat)) for y_hat in range(3)] assert_equal(np.argmax(energies), y) # test loss_augmented inference energy y, energy = pbl.loss_augmented_inference(x, Y[0], w, return_energy=True) assert_almost_equal(energy, np.dot(w, pbl.joint_feature(x, y)) + pbl.loss(Y[0], y)) # test batch versions Y_batch = pbl.batch_inference(X, w) Y_ = [pbl.inference(x, w) for x in X] assert_array_equal(Y_batch, Y_) Y_batch = pbl.batch_loss_augmented_inference(X, Y, w) Y_ = [pbl.loss_augmented_inference(x, y, w) for x, y in zip(X, Y)] assert_array_equal(Y_batch, Y_) loss_batch = pbl.batch_loss(Y, Y_) loss = [pbl.loss(y, y_) for y, y_ in zip(Y, Y_)] assert_array_equal(loss_batch, loss)
def test_crammer_singer_model_class_weight(): X, Y = make_blobs(n_samples=80, centers=3, random_state=42) # we have to add a constant 1 feature by hand :-/ X = np.hstack([X, np.ones((X.shape[0], 1))]) pbl = MultiClassClf(n_features=3, n_classes=3, class_weight=[1, 2, 1]) rng = np.random.RandomState(0) w = rng.uniform(size=pbl.size_psi) # test inference energy x = X[0] y, energy = pbl.inference(x, w, return_energy=True) assert_almost_equal(energy, np.dot(w, pbl.psi(x, y))) # test inference_result: energies = [np.dot(w, pbl.psi(x, y_hat)) for y_hat in xrange(3)] assert_equal(np.argmax(energies), y) # test loss_augmented inference energy y, energy = pbl.loss_augmented_inference(x, Y[0], w, return_energy=True) assert_almost_equal(energy, np.dot(w, pbl.psi(x, y)) + pbl.loss(Y[0], y)) # test batch versions Y_batch = pbl.batch_inference(X, w) Y_ = [pbl.inference(x, w) for x in X] assert_array_equal(Y_batch, Y_) Y_batch = pbl.batch_loss_augmented_inference(X, Y, w) Y_ = [pbl.loss_augmented_inference(x, y, w) for x, y in zip(X, Y)] assert_array_equal(Y_batch, Y_) loss_batch = pbl.batch_loss(Y, Y_) loss = [pbl.loss(y, y_) for y, y_ in zip(Y, Y_)] assert_array_equal(loss_batch, loss)
import numpy as np from sklearn.datasets import load_digits from pystruct.models import MultiClassClf from pystruct.learners import FrankWolfeSSVM digits = load_digits() X_train, y_train = digits.data, digits.target X_train = X_train / 16. y_train = y_train.astype(np.int) model = MultiClassClf() bcfw = FrankWolfeSSVM(model=model, C=.1, max_iter=1000, tol=0.1, verbose=3, check_dual_every=10) batch_fw = FrankWolfeSSVM(model=model, C=.1, max_iter=1000, batch_mode=True, tol=0.1, verbose=3, check_dual_every=10) bcfw.fit(X_train, y_train) batch_fw.fit(X_train, y_train) itr = [i * 10 for i in range(0, 12)] plt.plot(list(itr), list(d_gapBCFW), 'go-', label='line 1', linewidth=2) plt.title('BCFW duality gap at each 10 timesteps')
#Add the heart rate features x = np.append(x, window_with_timestamp_and_label[-2]) X = np.append(X, np.reshape(x, (1,-1)), axis=0) y = np.append(y, label) print("Finished feature extraction over {} windows".format(len(X))) print("Unique labels found: {}".format(set(y))) sys.stdout.flush() y = y.astype(int) model = MultiClassClf(n_features=X.shape[1], n_classes=2) print("\n") print("---------------------- Grid Search -------------------------") #grid_search param_grid = [ { 'C':[0.075,0.05,0.1], 'batch_size':[100,1000,150], 'tol':[1e-2,-10], 'inactive_window':[50,100,10], 'inactive_threshold':[1e-5,1e-4,1e-6] } ]