def test_overflow_averaged(): X = np.array([[np.finfo('d').max]]) Y = np.array([-1]) pcp = StructuredPerceptron(model=BinaryClf(n_features=1), max_iter=2, average=True) pcp.fit(X, Y) assert_true(np.isfinite(pcp.w[0]))
def test_constraint_removal(): digits = load_digits() X, y = digits.data, digits.target y = 2 * (y % 2) - 1 # even vs odd as +1 vs -1 X = X / 16. pbl = BinaryClf(n_features=X.shape[1]) clf_no_removal = OneSlackSSVM(model=pbl, max_iter=500, C=1, inactive_window=0, tol=0.01) clf_no_removal.fit(X, y) clf = OneSlackSSVM(model=pbl, max_iter=500, C=1, tol=0.01, inactive_threshold=1e-8) clf.fit(X, y) # check that we learned something assert_greater(clf.score(X, y), .92) # results are mostly equal # if we decrease tol, they will get more similar assert_less(np.mean(clf.predict(X) != clf_no_removal.predict(X)), 0.02) # without removal, have as many constraints as iterations assert_equal(len(clf_no_removal.objective_curve_), len(clf_no_removal.constraints_)) # with removal, there are less constraints than iterations assert_less(len(clf.constraints_), len(clf.objective_curve_))
def train_cue_learner(sentence_dicts, C_value): cue_lexicon, affixal_cue_lexicon = get_cue_lexicon(sentence_dicts) cue_sentence_dicts, cue_instances, cue_labels = extract_features_cue( sentence_dicts, cue_lexicon, affixal_cue_lexicon, 'training') vectorizer = DictVectorizer() fvs = vectorizer.fit_transform(cue_instances).toarray() model = BinaryClf() cue_ssvm = NSlackSSVM(model, C=C_value, batch_size=-1) cue_ssvm.fit(fvs, np.asarray(cue_labels)) return cue_ssvm, vectorizer, cue_lexicon, affixal_cue_lexicon
def test_simple_1d_dataset_cutting_plane(): # 10 1d datapoints between 0 and 1 X = np.random.uniform(size=(30, 1)) # linearly separable labels Y = 1 - 2 * (X.ravel() < .5) # we have to add a constant 1 feature by hand :-/ X = np.hstack([X, np.ones((X.shape[0], 1))]) pbl = BinaryClf(n_features=2) svm = NSlackSSVM(pbl, check_constraints=True, C=1000) svm.fit(X, Y) assert_array_equal(Y, np.hstack(svm.predict(X)))
def test_blobs_2d_one_slack(): # make two gaussian blobs X, Y = make_blobs(n_samples=80, centers=2, random_state=1) Y = 2 * Y - 1 # we have to add a constant 1 feature by hand :-/ X = np.hstack([X, np.ones((X.shape[0], 1))]) X_train, X_test, Y_train, Y_test = X[:40], X[40:], Y[:40], Y[40:] pbl = BinaryClf(n_features=3) svm = OneSlackSSVM(pbl, C=1000) svm.fit(X_train, Y_train) assert_array_equal(Y_test, np.hstack(svm.predict(X_test)))
def cue_trainer(filename, corenlp): newfilename = process_data(filename, corenlp) sentence_dicts = file_to_sentence_dict(newfilename) cue_dict, affix_cue_dict = get_cue_dict(sentence_dicts) sentence_dicts, cue_instances, cue_labels = extract_features_cue( sentence_dicts, cue_dict, affix_cue_dict, 'training') cue_vec = DictVectorizer() model = cue_vec.fit_transform(cue_instances).toarray() cue_ssvm = NSlackSSVM(BinaryClf(), C=0.2, batch_size=-1) #cue_ssvm = SVC(C = 0.2) cue_ssvm.fit(model, cue_labels) return sentence_dicts, cue_ssvm, cue_vec, cue_dict, affix_cue_dict """pickle.dump(cue_ssvm, open("cue_model_%s.pkl" %filename, "wb"))
def test_partial_averaging(): """Use XOR weight cycling to test partial averaging""" X = np.array([[a, b, 1] for a in (-1, 1) for b in (-1, 1)], dtype=np.float) Y = np.array([-1, 1, 1, -1]) pcp = StructuredPerceptron(model=BinaryClf(n_features=3), max_iter=5, decay_exponent=1, decay_t0=1) weight = {} for average in (0, 1, 4, -1): pcp.set_params(average=average) pcp.fit(X, Y) weight[average] = pcp.w assert_array_equal(weight[4], weight[-1]) assert_array_almost_equal(weight[0], [1.5, 3, 0]) assert_array_almost_equal(weight[1], [1.75, 3.5, 0]) assert_array_almost_equal(weight[4], [2.5, 5, 0])
def test_averaging_early_stopping(): """Test averaging over final epoch when early stopping""" # we use logical OR, an easy problem solved after the second epoch X = np.array([[a, b, 1] for a in (-1, 1) for b in (-1, 1)], dtype=np.float) Y = np.array([-1, 1, 1, 1]) pcp = StructuredPerceptron(model=BinaryClf(n_features=3), max_iter=3, average=-1) pcp.fit(X, Y) # The exact weight is used without the influence of the early iterations assert_array_equal(pcp.w, [1, 1, 1]) # If we were expecting 3 iterations, we would end up with a zero vector pcp.set_params(average=2) pcp.fit(X, Y) assert_array_equal(pcp.w, [0, 0, 0])
def test_blobs_batch(): # make two gaussian blobs X, Y = make_blobs(n_samples=80, centers=2, random_state=1) Y = 2 * Y - 1 pbl = BinaryClf(n_features=2) # test psi psi_mean = pbl.batch_psi(X, Y) psi_mean2 = np.sum([pbl.psi(x, y) for x, y in zip(X, Y)], axis=0) assert_array_equal(psi_mean, psi_mean2) # test inference w = np.random.uniform(-1, 1, size=pbl.size_psi) Y_hat = pbl.batch_inference(X, w) for i, (x, y_hat) in enumerate(zip(X, Y_hat)): assert_array_equal(Y_hat[i], pbl.inference(x, w)) # test inference Y_hat = pbl.batch_loss_augmented_inference(X, Y, w) for i, (x, y, y_hat) in enumerate(zip(X, Y, Y_hat)): assert_array_equal(Y_hat[i], pbl.loss_augmented_inference(x, y, w))
def test_model_1d(): # 10 1d datapoints between -1 and 1 np.random.seed(0) X = np.random.uniform(size=(10, 1)) # linearly separable labels Y = 1 - 2 * (X.ravel() < .5) pbl = BinaryClf(n_features=2) # we have to add a constant 1 feature by hand :-/ X = np.hstack([X, np.ones((X.shape[0], 1))]) w = [1, -.5] Y_pred = np.hstack([pbl.inference(x, w) for x in X]) assert_array_equal(Y, Y_pred) # check that sign of psi and inference agree for x, y in zip(X, Y): assert_true(np.dot(w, pbl.psi(x, y)) > np.dot(w, pbl.psi(x, -y))) # check that sign of psi and the sign of y correspond for x, y in zip(X, Y): assert_true(np.dot(w, pbl.psi(x, y)) == -np.dot(w, pbl.psi(x, -y)))
def test_xor(): """Test perceptron behaviour against hand-computed values for XOR""" X = np.array([[a, b, 1] for a in (-1, 1) for b in (-1, 1)], dtype=np.float) Y = np.array([-1, 1, 1, -1]) # Should cycle weight vectors (1, 1, -1), (0, 2, 0), (1, 1, 1), (0, 0, 0) # but this depends on how ties are settled. Maybe the test can be # made robust to this # Batch version should cycle (0, 0, -2), (0, 0, 0) expected_predictions = [ np.array([1, 1, 1, 1]), # online, no average, w = (0, 0, 0, 0) np.array([-1, 1, -1, 1]), # online, average, w ~= (0.5, 1, 0) np.array([1, 1, 1, 1]), # batch, no average, w = (0, 0, 0) np.array([-1, -1, -1, -1]) # batch, average, w ~= (0, 0, -2) ] pcp = StructuredPerceptron(model=BinaryClf(n_features=3), max_iter=2) for pred, (batch, average) in zip(expected_predictions, product((False, True), (False, True))): pcp.set_params(batch=batch, average=average) pcp.fit(X, Y) # We don't compare w explicitly but its prediction. As the perceptron # is invariant to the scaling of w, this will allow the optimization of # the underlying implementation assert_array_equal(pcp.predict(X), pred)
from pystruct.models import BinaryClf from pystruct.learners import (NSlackSSVM, OneSlackSSVM, SubgradientSSVM) # do a binary digit classification digits = load_digits() X, y = digits.data, digits.target # make binary task by doing odd vs even numers y = y % 2 # code as +1 and -1 y = 2 * y - 1 X /= X.max() X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) pbl = BinaryClf() n_slack_svm = NSlackSSVM(pbl, C=10, batch_size=-1) one_slack_svm = OneSlackSSVM(pbl, C=10, tol=0.1) subgradient_svm = SubgradientSSVM(pbl, C=10, learning_rate=0.1, max_iter=100, batch_size=10) # we add a constant 1 feature for the bias X_train_bias = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_test_bias = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) # n-slack cutting plane ssvm start = time() n_slack_svm.fit(X_train_bias, y_train)
import numpy as np import loader import util from sklearn import preprocessing directory = "/Users/thijs/dev/boilerplate/src/main/resources/dataset/" featureset = "features10" print("Load files") features, labels = \ loader.loadBinary(featureset+'.csv', 'labels.csv', directory) # print("Shuffle results") # features, labels = util.shuffle(features, labels) print("Loaded") # print(labels) # features = preprocessing.scale(features) from pystruct.models import BinaryClf from pystruct.learners import (NSlackSSVM, OneSlackSSVM, SubgradientSSVM, FrankWolfeSSVM) clf = FrankWolfeSSVM(BinaryClf(), verbose=True) # print(clf) clf.fit(features, labels) trscore = clf.score(features, labels) # print("Training score: {0}".format(trscore)) print("Klaar")
def test_break_ties(): pbl = BinaryClf(n_features=2) X = np.array([[-1., -1.], [-1., 1.], [1., 1.]]) w = np.array([1., 1.]) assert_array_equal(pbl.batch_inference(X, w), np.array([-1, 1, 1]))