def test_one_slack_constraint_caching(): # testing cutting plane ssvm on easy multinomial dataset X, Y = generate_blocks_multinomial(n_samples=10, noise=0.5, seed=0, size_x=9) n_labels = len(np.unique(Y)) exact_inference = get_installed([('ad3', {'branch_and_bound': True}), "lp"])[0] crf = GridCRF(n_states=n_labels, inference_method=exact_inference) clf = OneSlackSSVM(model=crf, max_iter=150, C=1, check_constraints=True, break_on_bad=True, inference_cache=50, inactive_window=0) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred) assert_equal(len(clf.inference_cache_), len(X)) # there should be 13 constraints, which are less than the 94 iterations # that are done # check that we didn't change the behavior of how we construct the cache constraints_per_sample = [len(cache) for cache in clf.inference_cache_] if exact_inference == "lp": assert_equal(len(clf.inference_cache_[0]), 18) assert_equal(np.max(constraints_per_sample), 18) assert_equal(np.min(constraints_per_sample), 18) else: assert_equal(len(clf.inference_cache_[0]), 13) assert_equal(np.max(constraints_per_sample), 20) assert_equal(np.min(constraints_per_sample), 11)
def syntetic_test(): # test model on different train set size & on different train sets results = np.zeros((18, 5)) full_labeled = np.array([2, 4, 10, 25, 100]) train_size = 400 for dataset in range(1, 19): X, Y = load_syntetic(dataset) for j, nfull in enumerate(full_labeled): crf = EdgeCRF(n_states=10, n_features=10, n_edge_features=2, inference_method='qpbo') clf = OneSlackSSVM(crf, max_iter=10000, C=0.01, verbose=0, tol=0.1, n_jobs=4, inference_cache=100) x_train = X[:nfull] y_train = Y[:nfull] x_test = X[(train_size + 1):] y_test = Y[(train_size + 1):] try: clf.fit(x_train, y_train) y_pred = clf.predict(x_test) results[dataset - 1, j] = compute_error(y_test, y_pred) print('dataset=%d, nfull=%d, error=%f' % (dataset, nfull, results[dataset - 1, j])) except ValueError: print('dataset=%d, nfull=%d: Failed' % (dataset, nfull)) np.savetxt('results/syntetic/full_labeled.txt', results)
def test_standard_svm_blobs_2d_class_weight(): # no edges, reduce to crammer-singer svm X, Y = make_blobs(n_samples=210, centers=3, random_state=1, cluster_std=3, shuffle=False) X = np.hstack([X, np.ones((X.shape[0], 1))]) X, Y = X[:170], Y[:170] X_graphs = [(x[np.newaxis, :], np.empty((0, 2), dtype=np.int)) for x in X] pbl = GraphCRF(n_features=3, n_states=3, inference_method='unary') svm = OneSlackSSVM(pbl, check_constraints=False, C=1000) svm.fit(X_graphs, Y[:, np.newaxis]) weights = 1. / np.bincount(Y) weights *= len(weights) / np.sum(weights) pbl_class_weight = GraphCRF(n_features=3, n_states=3, class_weight=weights, inference_method='unary') svm_class_weight = OneSlackSSVM(pbl_class_weight, C=10, check_constraints=False, break_on_bad=False) svm_class_weight.fit(X_graphs, Y[:, np.newaxis]) assert_greater(f1_score(Y, np.hstack(svm_class_weight.predict(X_graphs))), f1_score(Y, np.hstack(svm.predict(X_graphs))))
def test_class_weights_rescale_C(): # check that our crammer-singer implementation with class weights and # rescale_C=True is the same as LinearSVC's c-s class_weight implementation from sklearn.svm import LinearSVC X, Y = make_blobs(n_samples=210, centers=3, random_state=1, cluster_std=3, shuffle=False) X = np.hstack([X, np.ones((X.shape[0], 1))]) X, Y = X[:170], Y[:170] weights = 1. / np.bincount(Y) weights *= len(weights) / np.sum(weights) pbl_class_weight = MultiClassClf(n_features=3, n_classes=3, class_weight=weights, rescale_C=True) svm_class_weight = OneSlackSSVM(pbl_class_weight, C=10, tol=1e-5) svm_class_weight.fit(X, Y) try: linearsvm = LinearSVC(multi_class='crammer_singer', fit_intercept=False, class_weight='auto', C=10) linearsvm.fit(X, Y) assert_array_almost_equal(svm_class_weight.w, linearsvm.coef_.ravel(), 3) except TypeError: # travis has a really old sklearn version that doesn't support # class_weight in LinearSVC pass
def test_constraint_removal(): digits = load_digits() X, y = digits.data, digits.target y = 2 * (y % 2) - 1 # even vs odd as +1 vs -1 X = X / 16. pbl = BinarySVMModel(n_features=X.shape[1]) clf_no_removal = OneSlackSSVM(model=pbl, max_iter=500, verbose=1, C=10, inactive_window=0, tol=0.01) clf_no_removal.fit(X, y) clf = OneSlackSSVM(model=pbl, max_iter=500, verbose=1, C=10, tol=0.01, inactive_threshold=1e-8) clf.fit(X, y) # results are mostly equal # if we decrease tol, they will get more similar assert_less(np.mean(clf.predict(X) != clf_no_removal.predict(X)), 0.02) # without removal, have as many constraints as iterations # +1 for true y constraint assert_equal(len(clf_no_removal.objective_curve_) + 1, len(clf_no_removal.constraints_)) # with removal, there are less constraints than iterations assert_less(len(clf.constraints_), len(clf.objective_curve_))
def test_binary_blocks_one_slack_graph(): #testing cutting plane ssvm on easy binary dataset # generate graphs explicitly for each example for inference_method in ["dai", "lp"]: print("testing %s" % inference_method) X, Y = toy.generate_blocks(n_samples=3) crf = GraphCRF(inference_method=inference_method) clf = OneSlackSSVM(problem=crf, max_iter=100, C=100, verbose=100, check_constraints=True, break_on_bad=True, n_jobs=1) x1, x2, x3 = X y1, y2, y3 = Y n_states = len(np.unique(Y)) # delete some rows to make it more fun x1, y1 = x1[:, :-1], y1[:, :-1] x2, y2 = x2[:-1], y2[:-1] # generate graphs X_ = [x1, x2, x3] G = [make_grid_edges(x) for x in X_] # reshape / flatten x and y X_ = [x.reshape(-1, n_states) for x in X_] Y = [y.ravel() for y in [y1, y2, y3]] X = zip(X_, G) clf.fit(X, Y) Y_pred = clf.predict(X) for y, y_pred in zip(Y, Y_pred): assert_array_equal(y, y_pred)
def msrc(): models_basedir = 'models/msrc/' crf = EdgeCRF(n_states=24, n_features=2028, n_edge_features=4, inference_method='gco') clf = OneSlackSSVM(crf, max_iter=10000, C=0.01, verbose=2, tol=0.1, n_jobs=4, inference_cache=100) X, Y = load_msrc('train') Y = remove_areas(Y) start = time() clf.fit(X, Y) stop = time() np.savetxt(models_basedir + 'msrc_full.csv', clf.w) with open(models_basedir + 'msrc_full' + '.pickle', 'w') as f: pickle.dump(clf, f) X, Y = load_msrc('test') Y = remove_areas(Y) Y_pred = clf.predict(X) print('Error on test set: %f' % compute_error(Y, Y_pred)) print('Score on test set: %f' % clf.score(X, Y)) print('Norm of weight vector: |w|=%f' % np.linalg.norm(clf.w)) print('Elapsed time: %f s' % (stop - start)) return clf
def syntetic(): # train model on a single set models_basedir = 'models/syntetic/' crf = EdgeCRF(n_states=10, n_features=10, n_edge_features=2, inference_method='gco') clf = OneSlackSSVM(crf, max_iter=10000, C=0.01, verbose=2, tol=0.1, n_jobs=4, inference_cache=100) X, Y = load_syntetic(1) x_train, x_test, y_train, y_test = train_test_split(X, Y, train_size=100, random_state=179) start = time() clf.fit(x_train, y_train) stop = time() np.savetxt(models_basedir + 'syntetic_full.csv', clf.w) with open(models_basedir + 'syntetic_full' + '.pickle', 'w') as f: cPickle.dump(clf, f) y_pred = clf.predict(x_test) print 'Error on test set: %f' % compute_error(y_test, y_pred) print 'Score on test set: %f' % clf.score(x_test, y_test) print 'Score on train set: %f' % clf.score(x_train, y_train) print 'Norm of weight vector: |w|=%f' % np.linalg.norm(clf.w) print 'Elapsed time: %f s' % (stop - start) return clf
def test(nfiles): X = [] Y = [] X_tst = [] Y_tst = [] ntrain = nfiles ntest = 5*nfiles print("Training/testing with %d/%d files." % (ntrain,ntest)) start = time.clock() filename = '../maps/MAPS_AkPnCGdD_2/AkPnCGdD/MUS' #filename = '../maps/MAPS_AkPnCGdD_1/AkPnCGdD/ISOL/NO' files = dirs.get_files_with_extension(filename, '.mid') train_files = files[:ntrain] print("\t" + str(train_files)) #test_files = files[ntrain:ntest+ntrain] # for legit testing test_files = files[-ntest:] map(per_file, train_files, it.repeat(X, ntrain), it.repeat(Y, ntrain)) map(per_file, test_files, it.repeat(X_tst, ntest), it.repeat(Y_tst, ntest)) end = time.clock() print("\tRead time: %f" % (end - start)) print("\tnWindows train: " + str([X[i].shape[0] for i in range(len(X))])) start = time.clock() crf = ChainCRF(n_states=2) clf = OneSlackSSVM(model=crf, C=100, n_jobs=-1, inference_cache=100, tol=.1) clf.fit(np.array(X), np.array(Y)) end = time.clock() print("\tTrain time: %f" % (end - start)) start = time.clock() Y_pred = clf.predict(X_tst) comp = [] for i in range(len(Y_tst)): for j in range(len(Y_tst[i])): comp.append((Y_tst[i][j], Y_pred[i][j])) print Y_tst[i][j], print for j in range(len(Y_tst[i])): print Y_pred[i][j], print print print("\tTrue positives: %d" % comp.count((1,1))) print("\tTrue negatives: %d" % comp.count((0,0))) print("\tFalse positives: %d" % comp.count((0,1))) print("\tFalse negatives: %d" % comp.count((1,0))) end = time.clock() print("\tTest time: %f" % (end - start))
def main(): print("Please be patient. Will take 5-20 minutes.") snakes = load_snakes() X_train, Y_train = snakes['X_train'], snakes['Y_train'] X_train = [one_hot_colors(x) for x in X_train] Y_train_flat = [y_.ravel() for y_ in Y_train] X_train_directions, X_train_edge_features = prepare_data(X_train) # first, train on X with directions only: crf = EdgeFeatureGraphCRF(inference_method='qpbo') ssvm = OneSlackSSVM(crf, inference_cache=50, C=.1, tol=.1, switch_to='ad3', n_jobs=-1) ssvm.fit(X_train_directions, Y_train_flat) # Evaluate using confusion matrix. # Clearly the middel of the snake is the hardest part. X_test, Y_test = snakes['X_test'], snakes['Y_test'] X_test = [one_hot_colors(x) for x in X_test] Y_test_flat = [y_.ravel() for y_ in Y_test] X_test_directions, X_test_edge_features = prepare_data(X_test) Y_pred = ssvm.predict(X_test_directions) print("Results using only directional features for edges") print("Test accuracy: %.3f" % accuracy_score(np.hstack(Y_test_flat), np.hstack(Y_pred))) print(confusion_matrix(np.hstack(Y_test_flat), np.hstack(Y_pred))) # now, use more informative edge features: crf = EdgeFeatureGraphCRF(inference_method='qpbo') ssvm = OneSlackSSVM(crf, inference_cache=50, C=.1, tol=.1, switch_to='ad3', n_jobs=-1) ssvm.fit(X_train_edge_features, Y_train_flat) Y_pred2 = ssvm.predict(X_test_edge_features) print("Results using also input features for edges") print("Test accuracy: %.3f" % accuracy_score(np.hstack(Y_test_flat), np.hstack(Y_pred2))) print(confusion_matrix(np.hstack(Y_test_flat), np.hstack(Y_pred2))) # plot stuff fig, axes = plt.subplots(2, 2) axes[0, 0].imshow(snakes['X_test'][0], interpolation='nearest') axes[0, 0].set_title('Input') y = Y_test[0].astype(np.int) bg = 2 * (y != 0) # enhance contrast axes[0, 1].matshow(y + bg, cmap=plt.cm.Greys) axes[0, 1].set_title("Ground Truth") axes[1, 0].matshow(Y_pred[0].reshape(y.shape) + bg, cmap=plt.cm.Greys) axes[1, 0].set_title("Prediction w/o edge features") axes[1, 1].matshow(Y_pred2[0].reshape(y.shape) + bg, cmap=plt.cm.Greys) axes[1, 1].set_title("Prediction with edge features") for a in axes.ravel(): a.set_xticks(()) a.set_yticks(()) plt.show() from IPython.core.debugger import Tracer Tracer()()
def test_one_slack_attractive_potentials(): # test that submodular SSVM can learn the block dataset X, Y = generate_blocks(n_samples=10) crf = GridCRF(inference_method=inference_method) submodular_clf = OneSlackSSVM( model=crf, max_iter=200, C=1, check_constraints=True, negativity_constraint=[5], inference_cache=50 ) submodular_clf.fit(X, Y) Y_pred = submodular_clf.predict(X) assert_array_equal(Y, Y_pred) assert_true(submodular_clf.w[5] < 0)
def test_multinomial_blocks_one_slack(): #testing cutting plane ssvm on easy multinomial dataset X, Y = generate_blocks_multinomial(n_samples=10, noise=0.5, seed=0) n_labels = len(np.unique(Y)) crf = GridCRF(n_states=n_labels, inference_method=inference_method) clf = OneSlackSSVM(model=crf, max_iter=150, C=1, check_constraints=True, break_on_bad=True, tol=.1, inference_cache=50) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_one_slack_attractive_potentials(): # test that submodular SSVM can learn the block dataset X, Y = toy.generate_blocks(n_samples=10) crf = GridCRF() submodular_clf = OneSlackSSVM(model=crf, max_iter=200, C=100, verbose=1, check_constraints=True, positive_constraint=[5], n_jobs=-1) submodular_clf.fit(X, Y) Y_pred = submodular_clf.predict(X) assert_array_equal(Y, Y_pred) assert_true(submodular_clf.w[5] < 0) # don't ask me about signs
def test_multinomial_blocks_one_slack(): #testing cutting plane ssvm on easy multinomial dataset X, Y = toy.generate_blocks_multinomial(n_samples=10, noise=0.3, seed=0) n_labels = len(np.unique(Y)) for inference_method in ['lp']: crf = GridCRF(n_states=n_labels, inference_method=inference_method) clf = OneSlackSSVM(problem=crf, max_iter=50, C=100, verbose=100, check_constraints=True, break_on_bad=True) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred)
def test_multilabel_yeast_independent(): yeast = fetch_mldata("yeast") X = yeast.data y = yeast.target.toarray().T.astype(np.int) # no edges for the moment edges = np.zeros((0, 2), dtype=np.int) pbl = MultiLabelProblem(n_features=X.shape[1], n_labels=y.shape[1], edges=edges) ssvm = OneSlackSSVM(pbl, verbose=10) ssvm.fit(X, y) from IPython.core.debugger import Tracer Tracer()()
def test_blobs_2d_one_slack(): # make two gaussian blobs X, Y = make_blobs(n_samples=80, centers=2, random_state=1) Y = 2 * Y - 1 # we have to add a constant 1 feature by hand :-/ X = np.hstack([X, np.ones((X.shape[0], 1))]) X_train, X_test, Y_train, Y_test = X[:40], X[40:], Y[:40], Y[40:] pbl = BinarySVMModel(n_features=3) svm = OneSlackSSVM(pbl, verbose=30, C=1000) svm.fit(X_train, Y_train) assert_array_equal(Y_test, np.hstack(svm.predict(X_test)))
def test_blobs_2d_one_slack(): # make two gaussian blobs X, Y = make_blobs(n_samples=80, centers=3, random_state=42) # we have to add a constant 1 feature by hand :-/ X = np.hstack([X, np.ones((X.shape[0], 1))]) X_train, X_test, Y_train, Y_test = X[:40], X[40:], Y[:40], Y[40:] pbl = MultiClassClf(n_features=3, n_classes=3) svm = OneSlackSSVM(pbl, check_constraints=True, C=1000) svm.fit(X_train, Y_train) assert_array_equal(Y_test, np.hstack(svm.predict(X_test)))
def test_standard_svm_blobs_2d(): # no edges, reduce to crammer-singer svm X, Y = make_blobs(n_samples=80, centers=3, random_state=42) # we have to add a constant 1 feature by hand :-/ X = np.hstack([X, np.ones((X.shape[0], 1))]) X_train, X_test, Y_train, Y_test = X[:40], X[40:], Y[:40], Y[40:] X_train_graphs = [(x[np.newaxis, :], np.empty((0, 2), dtype=np.int)) for x in X_train] X_test_graphs = [(x[np.newaxis, :], np.empty((0, 2), dtype=np.int)) for x in X_test] pbl = GraphCRF(n_features=3, n_states=3) svm = OneSlackSSVM(pbl, verbose=10, check_constraints=True, C=1000) svm.fit(X_train_graphs, Y_train[:, np.newaxis]) assert_array_equal(Y_test, np.hstack(svm.predict(X_test_graphs)))
def test_svm_as_crf_pickling(): iris = load_iris() X, y = iris.data, iris.target X_ = [(np.atleast_2d(x), np.empty((0, 2), dtype=np.int)) for x in X] Y = y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X_, Y, random_state=1) _, file_name = mkstemp() pbl = GraphCRF(n_features=4, n_states=3, inference_method="unary") logger = SaveLogger(file_name) svm = OneSlackSSVM(pbl, check_constraints=True, C=1, n_jobs=1, logger=logger) svm.fit(X_train, y_train) assert_less(0.97, svm.score(X_test, y_test)) assert_less(0.97, logger.load().score(X_test, y_test))
def synteticTrain(peer): # train model on a single set clf = OneSlackSSVM(peer, max_iter=10000, C=0.01, verbose=2, tol=0.1, n_jobs=4, inference_cache=100) start = time() clf.fit() stop = time() models_basedir = peer.config.get("models.basedir") np.savetxt(models_basedir + 'syntetic_full.csv', clf.w[None], delimiter=' ') #with open(models_basedir + 'syntetic_full' + '.pickle', 'w') as f: # pickle.dump(clf, f) peer.log('Elapsed time: %f s' % (stop - start)) return clf
def test_switch_to_ad3(): # test if switching between qpbo and ad3 works if not get_installed(["qpbo"]) or not get_installed(["ad3"]): return X, Y = generate_blocks_multinomial(n_samples=5, noise=1.5, seed=0) crf = GridCRF(n_states=3, inference_method="qpbo") ssvm = OneSlackSSVM(crf, inference_cache=50, max_iter=10000) ssvm_with_switch = OneSlackSSVM(crf, inference_cache=50, max_iter=10000, switch_to=("ad3")) ssvm.fit(X, Y) ssvm_with_switch.fit(X, Y) assert_equal(ssvm_with_switch.model.inference_method, "ad3") # we check that the dual is higher with ad3 inference # as it might use the relaxation, that is pretty much guraranteed assert_greater(ssvm_with_switch.objective_curve_[-1], ssvm.objective_curve_[-1])
def msrc_test(): # test model on different train set sizes basedir = '../data/msrc/trainmasks/' models_basedir = 'models/msrc/' quality = [] Xtest, Ytest = load_msrc('test') Ytest = remove_areas(Ytest) Xtrain, Ytrain = load_msrc('train') Ytrain = remove_areas(Ytrain) for n_train in [20, 40, 80, 160, 276]: crf = EdgeCRF(n_states=24, n_features=2028, n_edge_features=4, inference_method='gco') clf = OneSlackSSVM(crf, max_iter=1000, C=0.01, verbose=0, tol=0.1, n_jobs=4, inference_cache=100) if n_train != 276: train_mask = np.genfromtxt(basedir + 'trainMaskX%d.txt' % n_train) train_mask = train_mask[:277].astype(np.bool) else: train_mask = np.ones(276).astype(np.bool) curX = [] curY = [] for (s, x, y) in zip(train_mask, Xtrain, Ytrain): if s: curX.append(x) curY.append(y) start = time() clf.fit(curX, curY) stop = time() np.savetxt(models_basedir + 'test_model_%d.csv' % n_train, clf.w) with open(models_basedir + 'test_model_%d' % n_train + '.pickle', 'w') as f: pickle.dump(clf, f) Ypred = clf.predict(Xtest) q = 1 - compute_error(Ytest, Ypred) print('n_train=%d, quality=%f, time=%f' % (n_train, q, stop - start)) quality.append(q) np.savetxt('results/msrc/msrc_full.txt', quality)
def conditional_random_fields(X, y): """ """ X_ = [(np.atleast_2d(x), np.empty((0, 2), dtype=np.int)) for x in X] Y = y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X_, Y) pbl = GraphCRF() svm = OneSlackSSVM(pbl) svm.fit(X_train, y_train) y_pred = np.vstack(svm.predict(X_test)) print("Score with pystruct crf svm: %f " % (np.mean(y_pred == y_test))) print classification_report(y_test, y_pred) plot_confusion_matrix(y_test, y_pred)
def test_class_weights(): X, Y = make_blobs(n_samples=210, centers=3, random_state=1, cluster_std=3, shuffle=False) X = np.hstack([X, np.ones((X.shape[0], 1))]) X, Y = X[:170], Y[:170] pbl = MultiClassClf(n_features=3, n_classes=3) svm = OneSlackSSVM(pbl, C=10) svm.fit(X, Y) weights = 1. / np.bincount(Y) weights *= len(weights) / np.sum(weights) pbl_class_weight = MultiClassClf(n_features=3, n_classes=3, class_weight=weights) svm_class_weight = OneSlackSSVM(pbl_class_weight, C=10) svm_class_weight.fit(X, Y) assert_greater(f1_score(Y, svm_class_weight.predict(X)), f1_score(Y, svm.predict(X)))
def test_equal_class_weights(): # test that equal class weight is the same as no class weight X, Y = make_blobs(n_samples=80, centers=3, random_state=42) X = np.hstack([X, np.ones((X.shape[0], 1))]) X_train, X_test, Y_train, Y_test = X[:40], X[40:], Y[:40], Y[40:] pbl = MultiClassClf(n_features=3, n_classes=3) svm = OneSlackSSVM(pbl, C=10) svm.fit(X_train, Y_train) predict_no_class_weight = svm.predict(X_test) pbl_class_weight = MultiClassClf(n_features=3, n_classes=3, class_weight=np.ones(3)) svm_class_weight = OneSlackSSVM(pbl_class_weight, C=10) svm_class_weight.fit(X_train, Y_train) predict_class_weight = svm_class_weight.predict(X_test) assert_array_equal(predict_no_class_weight, predict_class_weight) assert_array_almost_equal(svm.w, svm_class_weight.w)
def test_one_slack_constraint_caching(): #testing cutting plane ssvm on easy multinomial dataset X, Y = toy.generate_blocks_multinomial(n_samples=10, noise=0.3, seed=0) n_labels = len(np.unique(Y)) crf = GridCRF(n_states=n_labels, inference_method='lp') clf = OneSlackSSVM(problem=crf, max_iter=50, C=100, verbose=100, check_constraints=True, break_on_bad=True, inference_cache=50) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred) assert_equal(len(clf.inference_cache_), len(X)) # there should be 9 constraints, which are less than the 16 iterations # that are done assert_equal(len(clf.inference_cache_[0]), 9) # check that we didn't change the behavior of how we construct the cache constraints_per_sample = [len(cache) for cache in clf.inference_cache_] assert_equal(np.max(constraints_per_sample), 10) assert_equal(np.min(constraints_per_sample), 8)
def test_one_slack_constraint_caching(): #testing cutting plane ssvm on easy multinomial dataset X, Y = toy.generate_blocks_multinomial(n_samples=10, noise=0.5, seed=0, size_x=9) n_labels = len(np.unique(Y)) crf = GridCRF(n_states=n_labels) clf = OneSlackSSVM(model=crf, max_iter=150, C=1, check_constraints=True, break_on_bad=True, inference_cache=50, inactive_window=0) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(Y, Y_pred) assert_equal(len(clf.inference_cache_), len(X)) # there should be 21 constraints, which are less than the 94 iterations # that are done assert_equal(len(clf.inference_cache_[0]), 21) # check that we didn't change the behavior of how we construct the cache constraints_per_sample = [len(cache) for cache in clf.inference_cache_] assert_equal(np.max(constraints_per_sample), 21) assert_equal(np.min(constraints_per_sample), 21)
def test_class_weights(): # test that equal class weight is the same as no class weight X, Y = make_blobs(n_samples=210, centers=3, random_state=1, cluster_std=3, shuffle=False) X = np.hstack([X, np.ones((X.shape[0], 1))]) X, Y = X[:170], Y[:170] pbl = CrammerSingerSVMModel(n_features=3, n_classes=3) svm = OneSlackSSVM(pbl, verbose=10, C=10) svm.fit(X, Y) weights = 1. / np.bincount(Y) weights *= len(weights) / np.sum(weights) pbl_class_weight = CrammerSingerSVMModel(n_features=3, n_classes=3, class_weight=weights) svm_class_weight = OneSlackSSVM(pbl_class_weight, verbose=10, C=10) svm_class_weight.fit(X, Y) assert_greater(f1_score(Y, svm_class_weight.predict(X)), f1_score(Y, svm.predict(X)))
def test_svm_as_crf_pickling(): iris = load_iris() X, y = iris.data, iris.target X_ = [(np.atleast_2d(x), np.empty((0, 2), dtype=np.int)) for x in X] Y = y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X_, Y, random_state=1) _, file_name = mkstemp() pbl = GraphCRF(n_features=4, n_states=3, inference_method='unary') logger = SaveLogger(file_name) svm = OneSlackSSVM(pbl, check_constraints=True, C=1, n_jobs=1, logger=logger) svm.fit(X_train, y_train) assert_less(.97, svm.score(X_test, y_test)) assert_less(.97, logger.load().score(X_test, y_test))
def test_one_slack_repellent_potentials(): # test non-submodular problem with and without submodularity constraint # dataset is checkerboard X, Y = generate_checker() crf = GridCRF(inference_method=inference_method) clf = OneSlackSSVM(model=crf, max_iter=10, C=0.01, check_constraints=True) clf.fit(X, Y) Y_pred = clf.predict(X) # standard crf can predict perfectly assert_array_equal(Y, Y_pred) submodular_clf = OneSlackSSVM( model=crf, max_iter=10, C=0.01, check_constraints=True, negativity_constraint=[4, 5, 6] ) submodular_clf.fit(X, Y) Y_pred = submodular_clf.predict(X) assert_less(submodular_clf.score(X, Y), 0.99) # submodular crf can not do better than unaries for i, x in enumerate(X): y_pred_unaries = crf.inference(x, np.array([1, 0, 0, 1, 0, 0, 0])) assert_array_equal(y_pred_unaries, Y_pred[i])
def test_one_slack_repellent_potentials(): # test non-submodular problem with and without submodularity constraint # dataset is checkerboard X, Y = generate_checker() crf = GridCRF(inference_method=inference_method) clf = OneSlackSSVM(model=crf, max_iter=10, C=.01, check_constraints=True) clf.fit(X, Y) Y_pred = clf.predict(X) # standard crf can predict perfectly assert_array_equal(Y, Y_pred) submodular_clf = OneSlackSSVM(model=crf, max_iter=10, C=.01, check_constraints=True, negativity_constraint=[4, 5, 6]) submodular_clf.fit(X, Y) Y_pred = submodular_clf.predict(X) assert_less(submodular_clf.score(X, Y), .99) # submodular crf can not do better than unaries for i, x in enumerate(X): y_pred_unaries = crf.inference(x, np.array([1, 0, 0, 1, 0, 0, 0])) assert_array_equal(y_pred_unaries, Y_pred[i])
def test_switch_to_ad3(): # test if switching between qpbo and ad3 works if not get_installed(['qpbo']) or not get_installed(['ad3']): return X, Y = generate_blocks_multinomial(n_samples=5, noise=1.5, seed=0) crf = GridCRF(n_states=3, inference_method='qpbo') ssvm = OneSlackSSVM(crf, inference_cache=50, max_iter=10000) ssvm_with_switch = OneSlackSSVM(crf, inference_cache=50, max_iter=10000, switch_to=('ad3')) ssvm.fit(X, Y) ssvm_with_switch.fit(X, Y) assert_equal(ssvm_with_switch.model.inference_method, 'ad3') # we check that the dual is higher with ad3 inference # as it might use the relaxation, that is pretty much guraranteed assert_greater(ssvm_with_switch.objective_curve_[-1], ssvm.objective_curve_[-1]) print(ssvm_with_switch.objective_curve_[-1], ssvm.objective_curve_[-1])
def train_structured_svm(observations, targets): """ :param observations: our train dataset :param targets: multiple target variables. :return: the structured svm model """ # ideally you can say the edges that are connected. For now, we use full. n_labels = len(targets[0]) full = np.vstack([x for x in itertools.combinations(range(n_labels), 2)]) #tree = chow_liu_tree(targets) # Choose the best model... full_model = MultiLabelClf(edges=full, inference_method='lp') #tree_model = MultiLabelClf(edges=tree, inference_method="max-product") full_ssvm = OneSlackSSVM(full_model, inference_cache=50, C=.1, tol=0.01) full_ssvm.fit(np.array(observations), np.array(targets)) return full_ssvm
def train_structured_svm(observations,targets): """ :param observations: our train dataset :param targets: multiple target variables. :return: the structured svm model """ # ideally you can say the edges that are connected. For now, we use full. n_labels = len(targets[0]) full = np.vstack([x for x in itertools.combinations(range(n_labels), 2)]) #tree = chow_liu_tree(targets) # Choose the best model... full_model = MultiLabelClf(edges=full, inference_method='lp') #tree_model = MultiLabelClf(edges=tree, inference_method="max-product") full_ssvm = OneSlackSSVM(full_model, inference_cache=50, C=.1, tol=0.01) full_ssvm.fit(np.array(observations), np.array(targets)) return full_ssvm
def CRF_pred_prepro(xtrain, y, xtest, C=0.9, weight_shift=0, max_iter=1000, fs=128): y_train = y.values # CRF Model Preprocessing xtrain_ = xtrain ytrain_classes = np.reshape(y_train, (y_train.shape[0], )) ytrain_ = y_train xtest_ = xtest xtrain_crf = np.reshape( xtrain_, (3, -1, xtrain_.shape[1])) # Reshape so that it works with CRF ytrain_crf = np.reshape(ytrain_, (3, -1)) - 1 # Reshape so that it works with CRF # CRF Model fitting: classes = np.unique(ytrain_) weights_crf = compute_class_weight("balanced", list(classes), list(ytrain_classes)) weights_crf[0] = weights_crf[0] + (2.5 * weight_shift) weights_crf[1] = weights_crf[1] + (1.5 * weight_shift) model = ChainCRF(class_weight=weights_crf) ssvm = OneSlackSSVM(model=model, C=C, max_iter=max_iter) ssvm.fit(xtrain_crf, ytrain_crf) # Test on the third guy xtest_crf = np.reshape(xtest_, (2, -1, xtest_.shape[1])) y_pred_crf = ssvm.predict(xtest_crf) y_pred_crf = np.asarray(y_pred_crf).reshape(-1) + 1 return y_pred_crf
def test_class_weights(): X, Y = make_blobs(n_samples=210, centers=3, random_state=1, cluster_std=3, shuffle=False) X = np.hstack([X, np.ones((X.shape[0], 1))]) X, Y = X[:170], Y[:170] pbl = MultiClassClf(n_features=3, n_classes=3) svm = OneSlackSSVM(pbl, C=10) svm.fit(X, Y) weights = 1. / np.bincount(Y) weights *= len(weights) / np.sum(weights) pbl_class_weight = MultiClassClf(n_features=3, n_classes=3, class_weight=weights) svm_class_weight = OneSlackSSVM(pbl_class_weight, C=10) svm_class_weight.fit(X, Y) assert_greater(f1_score(Y, svm_class_weight.predict(X), average='macro'), f1_score(Y, svm.predict(X), average='macro'))
def fit_crf(self): for C in self.C_range: print("Testing C value: {}".format(C)) model = EdgeFeatureGraphCRF(inference_method="ad3") ssvm = OneSlackSSVM(model, inference_cache=50, C=C, tol=self.tol, max_iter=self.max_iter, n_jobs=4, verbose=False) ssvm.fit(self.X_train, self.y_train) predictions = [x for x in ssvm.predict(self.X_dev)] self.evaluate_predictions(predictions, self.y_dev) # Fit against the whole dataset except test # Is this approach correct? model = EdgeFeatureGraphCRF(inference_method="ad3") ssvm = OneSlackSSVM(model, inference_cache=50, C=0.03, tol=self.tol, max_iter=self.max_iter, n_jobs=4, verbose=False) X_train_dev = np.concatenate([self.X_train, self.X_dev]) y_train_dev = np.concatenate([self.y_train, self.y_dev]) ssvm.fit(X_train_dev, y_train_dev) if self.eval_against_test: predictions = [x for x in ssvm.predict(self.X_test)] print("Test set evaluation") self.evaluate_predictions(predictions, self.y_test) self.model = ssvm return ssvm
def losocv_CRF(eeg1, eeg2, emg, y, C=0.5, weight_shift=0, fs=128): """Leave one subject out cross validation for the CRF model becasuse it requires special datahandling. Input should be a Pandas Dataframe.""" epochs = 21600 num_sub = 3 # Indices of the subjects sub_indices = [ np.arange(0, epochs), np.arange(epochs, epochs * 2), np.arange(epochs * 2, epochs * 3) ] res = [] for i in range(len(sub_indices)): # For the ith iteration, select as trainin the sub_indices other than those at index i for train_index train_index = np.concatenate( [sub_indices[(i + 1) % num_sub], sub_indices[(i + 2) % num_sub]]) eeg1_train = eeg1.values[train_index] eeg2_train = eeg2.values[train_index] emg_train = emg.values[train_index] y_train = y.values[train_index] # The test subject is the one at index i test_index = sub_indices[i] eeg1_test = eeg1.values[test_index] eeg2_test = eeg2.values[test_index] emg_test = emg.values[test_index] y_test = y.values[test_index] # CRF Model Preprocessing eeg1_ = process_EEG(eeg1_train) eeg2_ = process_EEG(eeg2_train) emg_ = process_EMG(emg_train) xtrain_ = np.concatenate((eeg1_, eeg2_, emg_), axis=1) ytrain_classes = np.reshape(y_train, (y_train.shape[0], )) ytrain_ = y_train eeg1_t = process_EEG(eeg1_test) eeg2_t = process_EEG(eeg2_test) emg_t = process_EMG(emg_test) xtest_ = np.concatenate((eeg1_t, eeg2_t, emg_t), axis=1) ytest_ = y_test xtrain_crf = np.reshape( xtrain_, (2, -1, xtrain_.shape[1])) # Reshape so that it works with CRF ytrain_crf = np.reshape( ytrain_, (2, -1)) - 1 # Reshape so that it works with CRF # CRF Model fitting: classes = np.unique(ytrain_) weights_crf = compute_class_weight("balanced", list(classes), list(ytrain_classes)) weights_crf[0] = weights_crf[0] + (2.5 * weight_shift) weights_crf[1] = weights_crf[1] + (1.5 * weight_shift) model = ChainCRF(class_weight=weights_crf) ssvm = OneSlackSSVM(model=model, C=C, max_iter=2000) ssvm.fit(xtrain_crf, ytrain_crf) # Test on the third guy xtest_crf = np.reshape(xtest_, (1, -1, xtest_.shape[1])) ytest_crf = np.reshape(ytest_, (1, -1)) - 1 y_pred_crf = ssvm.predict(xtest_crf) y_pred_crf = np.asarray(y_pred_crf).reshape(-1) + 1 resy = sklearn.metrics.balanced_accuracy_score(ytest_, y_pred_crf) print("Iteration, result:", i, resy) res.append(resy) return res
print("Training SSVM") inference = 'qpbo' # first, train on X with directions only: crf = EdgeFeatureGraphCRF(inference_method=inference) ssvm = OneSlackSSVM(crf, inference_cache=50, C=1., tol=.1, max_iter=500, n_jobs=4) Y_flat = [y_.ravel() for y_ in Y] Y_flat = np.asarray( [Y_flat[i][j] for i in range(len(Y_flat)) for j in range(len(Y_flat[i]))]) ssvm.fit(X, Y) Z_bin = ssvm.predict(X) Z_flat = np.asarray( [Z_bin[i][j] for i in range(len(Z_bin)) for j in range(len(Z_bin[i]))]) f1_score = metrics.f1_score(Y_flat, Z_flat, average='weighted') conf_mat = metrics.confusion_matrix(Y_flat, Z_flat) TPR = conf_mat[0][0] / (conf_mat[0][0] + conf_mat[0][1]) FPR = conf_mat[1][0] / (conf_mat[1][0] + conf_mat[1][1]) print('Results with classifier: ' + ssvm.__class__.__name__) print('TPR/FPR = ' + str(TPR) + '/' + str(FPR)) print('F1-score = ' + str(f1_score)) my_classifier = ssvm # Run prediction on the img_idx-th image img_idx = 0 the_img = imgs[img_idx]
def main(): print("Please be patient. Will take 5-20 minutes.") snakes = load_snakes() X_train, Y_train = snakes['X_train'], snakes['Y_train'] X_train = [one_hot_colors(x) for x in X_train] Y_train_flat = [y_.ravel() for y_ in Y_train] X_train_directions, X_train_edge_features = prepare_data(X_train) if 'ogm' in get_installed(): inference = ('ogm', {'alg': 'fm'}) else: inference = 'qpbo' # first, train on X with directions only: crf = EdgeFeatureGraphCRF(inference_method=inference) ssvm = OneSlackSSVM(crf, inference_cache=50, C=.1, tol=.1, max_iter=100, n_jobs=1) ssvm.fit(X_train_directions, Y_train_flat) # Evaluate using confusion matrix. # Clearly the middel of the snake is the hardest part. X_test, Y_test = snakes['X_test'], snakes['Y_test'] X_test = [one_hot_colors(x) for x in X_test] Y_test_flat = [y_.ravel() for y_ in Y_test] X_test_directions, X_test_edge_features = prepare_data(X_test) Y_pred = ssvm.predict(X_test_directions) print("Results using only directional features for edges") print("Test accuracy: %.3f" % accuracy_score(np.hstack(Y_test_flat), np.hstack(Y_pred))) print(confusion_matrix(np.hstack(Y_test_flat), np.hstack(Y_pred))) # now, use more informative edge features: crf = EdgeFeatureGraphCRF(inference_method=inference) ssvm = OneSlackSSVM(crf, inference_cache=50, C=.1, tol=.1, switch_to='ad3', n_jobs=1) ssvm.fit(X_train_edge_features, Y_train_flat) Y_pred2 = ssvm.predict(X_test_edge_features) print("Results using also input features for edges") print("Test accuracy: %.3f" % accuracy_score(np.hstack(Y_test_flat), np.hstack(Y_pred2))) print(confusion_matrix(np.hstack(Y_test_flat), np.hstack(Y_pred2))) # plot stuff fig, axes = plt.subplots(2, 2) axes[0, 0].imshow(snakes['X_test'][0], interpolation='nearest') axes[0, 0].set_title('Input') y = Y_test[0].astype(np.int) bg = 2 * (y != 0) # enhance contrast axes[0, 1].matshow(y + bg, cmap=plt.cm.Greys) axes[0, 1].set_title("Ground Truth") axes[1, 0].matshow(Y_pred[0].reshape(y.shape) + bg, cmap=plt.cm.Greys) axes[1, 0].set_title("Prediction w/o edge features") axes[1, 1].matshow(Y_pred2[0].reshape(y.shape) + bg, cmap=plt.cm.Greys) axes[1, 1].set_title("Prediction with edge features") for a in axes.ravel(): a.set_xticks(()) a.set_yticks(()) plt.show() from IPython.core.debugger import Tracer Tracer()()
X_train_directions, X_train_edge_features = prepare_data(X_train) if 'ogm' in get_installed(): inference = ('ogm', {'alg': 'fm'}) else: inference = 'qpbo' # first, train on X with directions only: crf = EdgeFeatureGraphCRF(inference_method=inference) ssvm = OneSlackSSVM(crf, inference_cache=50, C=.1, tol=.1, max_iter=100, n_jobs=1) ssvm.fit(X_train_directions, Y_train_flat) # Evaluate using confusion matrix. # Clearly the middel of the snake is the hardest part. X_test, Y_test = snakes['X_test'], snakes['Y_test'] X_test = [one_hot_colors(x) for x in X_test] Y_test_flat = [y_.ravel() for y_ in Y_test] X_test_directions, X_test_edge_features = prepare_data(X_test) Y_pred = ssvm.predict(X_test_directions) print("Results using only directional features for edges") print("Test accuracy: %.3f" % accuracy_score(np.hstack(Y_test_flat), np.hstack(Y_pred))) print(confusion_matrix(np.hstack(Y_test_flat), np.hstack(Y_pred))) # now, use more informative edge features: crf = EdgeFeatureGraphCRF(inference_method=inference)
def Strukturni(x_train, y_train, x_test, y_test): import itertools import time import numpy as np from scipy import sparse from sklearn.metrics import hamming_loss from sklearn.metrics import accuracy_score from sklearn.metrics import mutual_info_score from scipy.sparse.csgraph import minimum_spanning_tree from pystruct.learners import OneSlackSSVM from pystruct.models import MultiLabelClf # from pystruct.models import GraphCRF from sklearn.neural_network import MLPClassifier from sklearn.tree import DecisionTreeClassifier x_train = x_train.values y_train = y_train.values y_test = y_test.values x_test = x_test.values """ CRF chain """ """ SSVM, MLP - pystruct """ """CREATE DATASET FOR GNN """ def chow_liu_tree(y_): n_labels = y_.shape[1] mi = np.zeros((n_labels, n_labels)) for i in range(n_labels): for j in range(n_labels): mi[i, j] = mutual_info_score(y_[:, i], y_[:, j]) mst = minimum_spanning_tree(sparse.csr_matrix(-mi)) edges = np.vstack(mst.nonzero()).T edges.sort(axis=1) return edges n_labels = y_train.shape[1] full = np.vstack([x for x in itertools.combinations(range(n_labels), 2)]) tree = chow_liu_tree(y_train) """ Define models """ full_model = MultiLabelClf(edges=full) independent_model = MultiLabelClf() tree_model = MultiLabelClf(edges=tree, inference_method='max-product') """ Define learn algorithm """ full_ssvm = OneSlackSSVM(full_model, inference_cache=50, C=.1, tol=0.01, max_iter=150) tree_ssvm = OneSlackSSVM(tree_model, inference_cache=50, C=.1, tol=0.01, max_iter=150) independent_ssvm = OneSlackSSVM(independent_model, C=.1, tol=0.01, max_iter=150) MLP = MLPClassifier() DT = DecisionTreeClassifier() """ Fit models """ time_ST = np.zeros(5) start_time = time.time() DT.fit(x_train, y_train) y_DT = DT.predict(x_test) time_ST[4] = time.time() - start_time start_time = time.time() MLP.fit(x_train, y_train) y_MLP = MLP.predict(x_test) time_ST[3] = time.time() - start_time start_time = time.time() independent_ssvm.fit(x_train, y_train) y_ind = independent_ssvm.predict(x_test) time_ST[0] = time.time() - start_time start_time = time.time() full_ssvm.fit(x_train, y_train) y_full = full_ssvm.predict(x_test) time_ST[1] = time.time() - start_time start_time = time.time() tree_ssvm.fit(x_train, y_train) y_tree = tree_ssvm.predict(x_test) time_ST[2] = time.time() - start_time """ EVALUATE models """ HL = np.zeros(5) ACC = np.zeros(5) y_full = np.asarray(y_full) y_ind = np.asarray(y_ind) y_tree = np.asarray(y_tree) HL[0] = hamming_loss(y_test, y_ind) HL[1] = hamming_loss(y_test, y_full) HL[2] = hamming_loss(y_test, y_tree) HL[3] = hamming_loss(y_test, y_MLP) HL[4] = hamming_loss(y_test, y_DT) y_ind = y_ind.reshape([y_ind.shape[0] * y_ind.shape[1]]) y_full = y_full.reshape([y_full.shape[0] * y_full.shape[1]]) y_tree = y_tree.reshape([y_tree.shape[0] * y_tree.shape[1]]) y_MLP = y_MLP.reshape([y_MLP.shape[0] * y_MLP.shape[1]]) y_DT = y_DT.reshape([y_DT.shape[0] * y_DT.shape[1]]) y_test = y_test.reshape([y_test.shape[0] * y_test.shape[1]]) ACC[0] = accuracy_score(y_test, y_ind) ACC[1] = accuracy_score(y_test, y_full) ACC[2] = accuracy_score(y_test, y_tree) ACC[3] = accuracy_score(y_test, y_MLP) ACC[4] = accuracy_score(y_test, y_DT) return ACC, HL, time_ST
max_iter=1000, verbose=0) fw_bc_svm = FrankWolfeSSVM(model, C=.1, max_iter=50) fw_batch_svm = FrankWolfeSSVM(model, C=.1, max_iter=50, batch_mode=True) # n-slack cutting plane ssvm start = time() n_slack_svm.fit(X_train_bias, y_train) time_n_slack_svm = time() - start y_pred = np.hstack(n_slack_svm.predict(X_test_bias)) print("Score with pystruct n-slack ssvm: %f (took %f seconds)" % (np.mean(y_pred == y_test), time_n_slack_svm)) ## 1-slack cutting plane ssvm start = time() one_slack_svm.fit(X_train_bias, y_train) time_one_slack_svm = time() - start y_pred = np.hstack(one_slack_svm.predict(X_test_bias)) print("Score with pystruct 1-slack ssvm: %f (took %f seconds)" % (np.mean(y_pred == y_test), time_one_slack_svm)) #online subgradient ssvm start = time() subgradient_svm.fit(X_train_bias, y_train) time_subgradient_svm = time() - start y_pred = np.hstack(subgradient_svm.predict(X_test_bias)) print("Score with pystruct subgradient ssvm: %f (took %f seconds)" % (np.mean(y_pred == y_test), time_subgradient_svm)) # the standard one-vs-rest multi-class would probably be as good and faster
yeast = fetch_mldata("yeast") X = yeast.data X = np.hstack([X, np.ones((X.shape[0], 1))]) y = yeast.target.toarray().astype(np.int).T X_train, X_test = X[:1500], X[1500:] y_train, y_test = y[:1500], y[1500:] else: scene = load_scene() X_train, X_test = scene['X_train'], scene['X_test'] y_train, y_test = scene['y_train'], scene['y_test'] n_labels = y_train.shape[1] full = np.vstack([x for x in itertools.combinations(range(n_labels), 2)]) tree = chow_liu_tree(y_train) #tree_model = MultiLabelClf(edges=tree, inference_method=('ogm', {'alg': 'dyn'})) tree_model = MultiLabelClf(edges=tree, inference_method='max-product') tree_ssvm = OneSlackSSVM(tree_model, inference_cache=50, C=.1, tol=0.01) print("fitting tree model...") tree_ssvm.fit(X_train, y_train) print("Training loss tree model: %f" % hamming_loss(y_train, np.vstack(tree_ssvm.predict(X_train)))) print("Test loss tree model: %f" % hamming_loss(y_test, np.vstack(tree_ssvm.predict(X_test))))
kf = KFold(num_jackets, n_folds=n_folds) fold = 0 for train_index, test_index in kf: print(' ') print('train index {}'.format(train_index)) print('test index {}'.format(test_index)) print('{} jackets for training, {} for testing'. \ format(len(train_index), len(test_index))) X_train = X[train_index] Y_train = Y[train_index] X_test = X[test_index] Y_test = Y[test_index] start = time.time() """ YOUR S-SVM TRAINING CODE HERE """ ssvm.fit(X_train, Y_train) end = time.time() print('CRF learning of 1 fold has taken {} seconds'.format( (end - start) / 1000.0)) scores_crf[fold] = ssvm.score(X_test, Y_test) print(np.round(end - start), 'elapsed seconds to train the model') print("Test score with chain CRF: %f" % scores_crf[fold]) """ Label the testing set and print results """ Y_pred = ssvm.predict(X_test) wrong_fold_crf = np.sum(np.ravel(Y_test) - np.ravel(Y_pred) != 0) wrong_segments_crf.append(wrong_fold_crf) print('{} wrong segments out of {}'. \ format(wrong_fold_crf, len(test_index) * num_segments_per_jacket)) """ figure showing the result of classification of segments for each jacket in the testing part of present fold """
def Strukturni(x_train, y_train, x_test, y_test): import itertools import time import numpy as np from scipy import sparse from sklearn.metrics import hamming_loss from sklearn.metrics import accuracy_score from sklearn.metrics import mutual_info_score from scipy.sparse.csgraph import minimum_spanning_tree from pystruct.learners import OneSlackSSVM # from pystruct.learners import FrankWolfeSSVM from pystruct.models import MultiLabelClf from pystruct.models import GraphCRF from sklearn.neural_network import MLPClassifier from sklearn.tree import DecisionTreeClassifier def chow_liu_tree(y_): n_labels = y_.shape[1] mi = np.zeros((n_labels, n_labels)) for i in range(n_labels): for j in range(n_labels): mi[i, j] = mutual_info_score(y_[:, i], y_[:, j]) mst = minimum_spanning_tree(sparse.csr_matrix(-mi)) edges = np.vstack(mst.nonzero()).T edges.sort(axis=1) return edges x_train = x_train.values y_train = y_train.values y_train = y_train.astype(int) y_test = y_test.values y_test = y_test.astype(int) x_test = x_test.values time_ST = np.zeros(7) HL = np.zeros(7) ACC = np.zeros(7) n_labels = y_train.shape[1] full = np.vstack([x for x in itertools.combinations(range(n_labels), 2)]) tree = chow_liu_tree(y_train) """ CRF chain """ train_tree = [] train_full = [] test_tree = [] test_full = [] for k in range(y_train.shape[0]): X_train_CRF = np.zeros([y_train.shape[1], 18]) for i in range(y_train.shape[1]): kolone = np.array([x for x in range(i * 18, 18 * (i + 1))]) X_train_CRF[i, :] = x_train[k, kolone] train_tree.append((X_train_CRF.copy(), tree.T)) train_full.append((X_train_CRF.copy(), full.T)) for k in range(y_test.shape[0]): X_test_CRF = np.zeros([y_test.shape[1], 18]) for i in range(y_test.shape[1]): kolone = np.array([x for x in range(i * 18, 18 * (i + 1))]) X_test_CRF[i, :] = x_test[k, kolone] test_tree.append((X_test_CRF.copy(), tree)) test_full.append((X_test_CRF.copy(), full)) """ SSVM, MLP, CRF-graph, DT - pystruct """ """CREATE DATASET FOR GNN """ """ Define models """ full_model = MultiLabelClf(edges=full) independent_model = MultiLabelClf() tree_model = MultiLabelClf(edges=tree, inference_method='max-product') modelCRF_tree = GraphCRF(directed=False, inference_method="max-product") modelCRF_full = GraphCRF(directed=False, inference_method="max-product") """ Define learn algorithm """ full_ssvm = OneSlackSSVM(full_model, inference_cache=50, C=.1, tol=0.01, max_iter=150) tree_ssvm = OneSlackSSVM(tree_model, inference_cache=50, C=.1, tol=0.01, max_iter=150) independent_ssvm = OneSlackSSVM(independent_model, C=.1, tol=0.01, max_iter=150) MLP = MLPClassifier() DT = DecisionTreeClassifier() CRF_tree = OneSlackSSVM(model=modelCRF_tree, C=.1, max_iter=250) CRF_full = OneSlackSSVM(model=modelCRF_full, C=.1, max_iter=250) """ Fit models """ start_time = time.time() independent_ssvm.fit(x_train, y_train) y_ind = independent_ssvm.predict(x_test) time_ST[0] = time.time() - start_time start_time = time.time() full_ssvm.fit(x_train, y_train) y_full = full_ssvm.predict(x_test) time_ST[1] = time.time() - start_time start_time = time.time() tree_ssvm.fit(x_train, y_train) y_tree = tree_ssvm.predict(x_test) time_ST[2] = time.time() - start_time start_time = time.time() MLP.fit(x_train, y_train) y_MLP = MLP.predict(x_test) time_ST[3] = time.time() - start_time start_time = time.time() DT.fit(x_train, y_train) y_DT = DT.predict(x_test) time_ST[4] = time.time() - start_time start_time = time.time() CRF_tree.fit(train_tree, y_train) yCRF_tree = np.asarray(CRF_tree.predict(test_tree)) time_ST[5] = time.time() - start_time start_time = time.time() CRF_full.fit(train_full, y_train) yCRF_full = np.asarray(CRF_full.predict(test_full)) time_ST[6] = time.time() - start_time """ EVALUATE models """ y_full = np.asarray(y_full) y_ind = np.asarray(y_ind) y_tree = np.asarray(y_tree) HL[0] = hamming_loss(y_test, y_ind) HL[1] = hamming_loss(y_test, y_full) HL[2] = hamming_loss(y_test, y_tree) HL[3] = hamming_loss(y_test, y_MLP) HL[4] = hamming_loss(y_test, y_DT) HL[5] = hamming_loss(y_test, yCRF_tree) HL[6] = hamming_loss(y_test, yCRF_full) y_ind = y_ind.reshape([y_ind.shape[0] * y_ind.shape[1]]) y_full = y_full.reshape([y_full.shape[0] * y_full.shape[1]]) y_tree = y_tree.reshape([y_tree.shape[0] * y_tree.shape[1]]) y_MLP = y_MLP.reshape([y_MLP.shape[0] * y_MLP.shape[1]]) y_DT = y_DT.reshape([y_DT.shape[0] * y_DT.shape[1]]) yCRF_tree = yCRF_tree.reshape([yCRF_tree.shape[0] * yCRF_tree.shape[1]]) yCRF_full = yCRF_full.reshape([yCRF_full.shape[0] * yCRF_full.shape[1]]) y_test = y_test.reshape([y_test.shape[0] * y_test.shape[1]]) ACC[0] = accuracy_score(y_test, y_ind) ACC[1] = accuracy_score(y_test, y_full) ACC[2] = accuracy_score(y_test, y_tree) ACC[3] = accuracy_score(y_test, y_MLP) ACC[4] = accuracy_score(y_test, y_DT) ACC[5] = accuracy_score(y_test, y_MLP) ACC[6] = accuracy_score(y_test, y_DT) return ACC, HL, time_ST
# [Note: if you get an error on the below line, it may be because you need to upgrade scikit-learn] encoder = OneHotEncoder(n_values=[1, 2, 2, 201, 201], sparse=False).fit(np.vstack(X)) # Represent features using one-of-K scheme: If a feature can take value in X_encoded = [ encoder.transform(x) for x in X ] # {0,...,K}, then introduce K binary features such that the value of only # the i^th binary feature is non-zero when the feature takes value 'i'. # n_values specifies the number of states each feature can take. X_small, y_small = X_encoded[: 100], y[: 100] # Pick the first 100 samples from the encoded training set. # See: http://pystruct.github.io/generated/pystruct.learners.OneSlackSSVM.html # See: http://pystruct.github.io/generated/pystruct.models.ChainCRF.html # Rest of documentation can be found here: http://pystruct.github.io/references.html ssvm = OneSlackSSVM(ChainCRF(n_states=10, inference_method='max-product', directed=True), max_iter=200, C=1) # Construct a directed ChainCRF with 10 states for each variable, # and pass this CRF to OneSlackSSVM constructor to create an object 'ssvm' ssvm.fit(X_small, y_small) # Learn Structured SVM using X_small and y_small weights = ssvm.w # Store learnt weights in 'weights' print ssvm.score(X_small, y_small) # Evaluate training accuracy on X_small, y_small print ssvm.predict( X_small) # Get predicted labels on X_small using the learnt model
check_constraints=False, max_iter=100, tol=0.001, inference_cache=50) subgradient_svm = SubgradientSSVM(crf, learning_rate=0.001, max_iter=20, decay_exponent=0, momentum=0) bcfw_svm = FrankWolfeSSVM(crf, max_iter=50, check_dual_every=4) #n-slack cutting plane ssvm n_slack_svm.fit(X, Y) # 1-slack cutting plane ssvm one_slack_svm.fit(X, Y) # online subgradient ssvm subgradient_svm.fit(X, Y) # Block coordinate Frank-Wolfe bcfw_svm.fit(X, Y) # don't plot objective from chached inference for 1-slack inference_run = ~np.array(one_slack_svm.cached_constraint_) time_one = np.array(one_slack_svm.timestamps_[1:])[inference_run] # plot stuff plt.plot(n_slack_svm.timestamps_[1:], n_slack_svm.objective_curve_, label="n-slack cutting plane")
encoder = OneHotEncoder(n_values=[1, 2, 2, 201, 201], sparse=False).fit(np.vstack(X)) # FROM SAMPLE #Represent features using one-of-K scheme: If a feature can take value in X_encoded = [ encoder.transform(x) for x in X ] # FROM SAMPLE #{0,...,K}, then introduce K binary features such that the value of only return X_encoded, y, sentences # FROM SAMPLE #the i^th binary feature is non-zero when the feature takes value 'i'. X_train, Y_train, TrainSent = ReadData("train") best_C = 0.1 crf = ChainCRF(n_states=10, inference_method="max-product", directed=True) ssvm = OneSlackSSVM(crf, max_iter=200, C=best_C) ssvm.fit(X_train[:4500], Y_train[:4500]) error = 1 - ssvm.score(X_train[-500:], Y_train[-500:]) tag = np.array([ 'verb', 'noun', 'adjective', 'adverb', 'preposition', 'pronoun', 'determiner', 'number', 'punctuation', 'other' ]) cl = random.sample(range(10), 3) print('Chosen classes: ', tag[cl]) trans_matrix = np.reshape(ssvm.w[-10 * 10:], (10, 10)) pairs = list(itertools.combinations(cl, 2)) for pair in pairs: print(tag[pair[0]], "->", tag[pair[1]], trans_matrix[pair[0]][pair[1]]) print(tag[pair[1]], "->", tag[pair[0]], trans_matrix[pair[1]][pair[0]])
# model = GraphCRF(directed=True, inference_method="ad3") print(datetime.datetime.now()) model = GraphCRF(directed=True, inference_method = ('lp', {'relaxed' : True})) # Use a n-slack SSVM learner # ssvm = FrankWolfeSSVM(model=model, C=.1, max_iter=50) # predict_result = ssvm.predict(test_set) # ssvm.fit(train_set, train_label) # score = ssvm.score(test_set, test_label) from pystruct.learners import OneSlackSSVM learner = OneSlackSSVM(model=model, C=.02, max_iter=10) learner.fit(train_set, train_label) lp_probs = learner.predict(test_set) # print(lp_probs[0][0][:,1]) print(datetime.datetime.now()) # pickle trained crf if pickle_overwrite: fileObject = open(file_model,'wb') pickle.dump(learner,fileObject) fileObject.close()
from sklearn.datasets import load_iris from sklearn.svm import LinearSVC from pystruct.models import CrammerSingerSVMModel from pystruct.learners import OneSlackSSVM # Load three class iris data. iris = load_iris() X, y = iris.data, iris.target # PyStruct interface model = CrammerSingerSVMModel() one_slack_svm = OneSlackSSVM(model) one_slack_svm.fit(X, y) # scikit-learn interface for liblinear libsvm = LinearSVC(multi_class='crammer_singer') libsvm.fit(X, y)
crf, inference_cache=50, C=.1, tol=.1, # max_iter=MAXITER, n_jobs=N_JOBS #,verbose=1 , switch_to='ad3') Y_train_flat = [y_.ravel() for y_ in Y_train] print "\ttrain label histogram : ", np.histogram( np.hstack(Y_train_flat), bins=range(NCELL + 2)) t0 = time.time() ssvm.fit(X_train_edge_features, Y_train_flat) print "FIT DONE IN %.1fs" % (time.time() - t0) sys.stdout.flush() t0 = time.time() _Y_pred = ssvm.predict(X_test_edge_features) REPORT(Y_test, _Y_pred, time.time() - t0, NCELL, "gen_singletype_%d.csv" % nbSample, True, "singletype_%d" % nbSample) _Y_pred = ssvm.predict(X_test_gen_edge_features) REPORT(Y_test_gen, _Y_pred, None, NCELL, "gen_singletype_gentest_%d.csv" % nbSample, True, "singletype_%d_gentest" % nbSample) #--------------------------------------------------------------------------------------------------
running_sum = 0 for index in index_list: running_sum += index new_index_list.append(running_sum) new_index_list = pair(new_index_list, 2) new_index_list.insert(0, (0, new_index_list[0][0])) Xtest_scale = [] for num in new_index_list: new_array = Xtest_scaler[num[0]:num[1], :] Xtest_scale.append(new_array) Xtest_scale = np.array(Xtest_scale) print Xtest_scale.shape print 'Test set scaled' ssvm.fit(Xtrain_scale, y_train) print 'Model fit' # Storing model weights for plot of transition probabilities if type(transition_states) == str: transition_states = ssvm.w[-49:].reshape(7, 7) else: transition_states = transition_states + ssvm.w[-49:].reshape(7, 7) predicted = ssvm.predict(Xtest_scale) print 'Predictions made' # Metrics fold_cm = confusion_matrix(np.hstack(y_test), np.hstack(predicted)) if type(total_confusion_matrix) == str: total_confusion_matrix = fold_cm else:
X_train = [one_hot_colors(x) for x in X_train] Y_train_flat = [y_.ravel() for y_ in Y_train] X_train_directions, X_train_edge_features = prepare_data(X_train) inference = 'ad3+' # first, train on X with directions only: crf = NodeTypeEdgeFeatureGraphCRF(1, [11], [45], [[2]], inference_method=inference) ssvm = OneSlackSSVM(crf, inference_cache=50, C=.1, tol=.1, max_iter=100, n_jobs=1) ssvm.fit(convertToSingleTypeX(X_train_directions), Y_train_flat) # Evaluate using confusion matrix. # Clearly the middel of the snake is the hardest part. X_test, Y_test = snakes['X_test'], snakes['Y_test'] X_test = [one_hot_colors(x) for x in X_test] Y_test_flat = [y_.ravel() for y_ in Y_test] X_test_directions, X_test_edge_features = prepare_data(X_test) Y_pred = ssvm.predict(convertToSingleTypeX(X_test_directions)) print("Results using only directional features for edges") print("Test accuracy: %.3f" % accuracy_score(np.hstack(Y_test_flat), np.hstack(Y_pred))) print(confusion_matrix(np.hstack(Y_test_flat), np.hstack(Y_pred))) # now, use more informative edge features: crf = NodeTypeEdgeFeatureGraphCRF(1, [11], [45], [[180]],
from sklearn.utils import shuffle from pystruct.problems import CrammerSingerSVMProblem #from pystruct.learners import SubgradientStructuredSVM #from pystruct.learners import StructuredSVM from pystruct.learners import OneSlackSSVM mnist = fetch_mldata("MNIST original") X, y = mnist.data, mnist.target X = X / 255. X_train, y_train = X[:60000], y[:60000] X_test, y_test = X[60000:], y[60000:] X_train, y_train = shuffle(X_train, y_train) pblm = CrammerSingerSVMProblem(n_classes=10, n_features=28**2) #svm = SubgradientStructuredSVM(pblm, verbose=10, n_jobs=1, plot=True, #max_iter=10, batch=False, learning_rate=0.0001, #momentum=0) #svm = SubgradientStructuredSVM(pblm, verbose=10, n_jobs=1, plot=True, #max_iter=2, batch=False, momentum=.9, #learning_rate=0.001, show_loss='true', C=1000) svm = OneSlackSSVM(pblm, verbose=2, n_jobs=1, plot=True, max_iter=2, C=1000) #svm = StructuredSVM(pblm, verbose=50, n_jobs=1, plot=True, max_iter=10, #C=1000) svm.fit(X_train, y_train) print(svm.score(X_train, y_train)) print(svm.score(X_test, y_test))
class SSVM: """Structured SVM wrapper""" def __init__(self, inference_train, inference_pred, dat_obj, C=1.0, share_params=True, multi_label=True, poi_info=None, debug=False): assert (C > 0) self.C = C self.inference_train = inference_train self.inference_pred = inference_pred self.share_params = share_params self.multi_label = multi_label self.dat_obj = dat_obj self.debug = debug self.trained = False if poi_info is None: self.poi_info = None else: self.poi_info = poi_info self.scaler_node = MinMaxScaler(feature_range=(-1, 1), copy=False) self.scaler_edge = MinMaxScaler(feature_range=(-1, 1), copy=False) def train(self, trajid_list, n_jobs=4): if self.poi_info is None: self.poi_info = self.dat_obj.calc_poi_info(trajid_list) # build POI_ID <--> POI__INDEX mapping for POIs used to train CRF # which means only POIs in traj such that len(traj) >= 2 are included poi_set = { p for tid in trajid_list for p in self.dat_obj.traj_dict[tid] if len(self.dat_obj.traj_dict[tid]) >= 2 } self.poi_list = sorted(poi_set) self.poi_id_dict, self.poi_id_rdict = dict(), dict() for idx, poi in enumerate(self.poi_list): self.poi_id_dict[poi] = idx self.poi_id_rdict[idx] = poi # generate training data train_traj_list = [ self.dat_obj.traj_dict[k] for k in trajid_list if len(self.dat_obj.traj_dict[k]) >= 2 ] node_features_list = Parallel(n_jobs=n_jobs)( delayed(calc_node_features)(tr[0], len(tr), self.poi_list, self.poi_info, self.dat_obj) for tr in train_traj_list) edge_features = calc_edge_features(trajid_list, self.poi_list, self.poi_info, self.dat_obj) # feature scaling: node features # should each example be flattened to one vector before scaling? self.fdim_node = node_features_list[0].shape X_node_all = np.vstack(node_features_list) X_node_all = self.scaler_node.fit_transform(X_node_all) X_node_all = X_node_all.reshape(-1, self.fdim_node[0], self.fdim_node[1]) # feature scaling: edge features fdim_edge = edge_features.shape edge_features = self.scaler_edge.fit_transform( edge_features.reshape(fdim_edge[0] * fdim_edge[1], -1)) self.edge_features = edge_features.reshape(fdim_edge) assert (len(train_traj_list) == X_node_all.shape[0]) X_train = [(X_node_all[k, :, :], self.edge_features.copy(), (self.poi_id_dict[train_traj_list[k][0]], len(train_traj_list[k]))) for k in range(len(train_traj_list))] y_train = [ np.array([self.poi_id_dict[k] for k in tr]) for tr in train_traj_list ] assert (len(X_train) == len(y_train)) # train sm = MyModel(inference_train=self.inference_train, inference_pred=self.inference_pred, share_params=self.share_params, multi_label=self.multi_label) if self.debug is True: print('C:', self.C) verbose = 1 if self.debug is True else 0 self.osssvm = OneSlackSSVM(model=sm, C=self.C, n_jobs=n_jobs, verbose=verbose) try: self.osssvm.fit(X_train, y_train, initialize=True) self.trained = True print('SSVM training finished.') # except ValueError: except: self.trained = False sys.stderr.write('SSVM training FAILED.\n') # raise return self.trained def predict(self, startPOI, nPOI): assert (self.trained is True) if startPOI not in self.poi_list: return None X_node_test = calc_node_features(startPOI, nPOI, self.poi_list, self.poi_info, self.dat_obj) # feature scaling # should each example be flattened to one vector before scaling? # X_node_test = X_node_test.reshape(1, -1) # flatten test example to a vector X_node_test = self.scaler_node.transform(X_node_test) # X_node_test = X_node_test.reshape(self.fdim) X_test = [(X_node_test, self.edge_features, (self.poi_id_dict[startPOI], nPOI))] y_hat_list = self.osssvm.predict(X_test)[0] # print(y_hat_list) return [ np.array([self.poi_id_rdict[x] for x in y_hat]) for y_hat in y_hat_list ]
X_train = X_train[:4500] Y_train = Y_train[:4500] crf = ChainCRF(n_states=10, inference_method='max-product', directed=True) l1 = [10**i for i in range(-4, 3, 1)] l1.extend([5 * l for l in l1]) Cs = sorted(l1) error = {} best_C = {} Train_Sizes = [100, 200, 500, 1000, 4500] for b in Train_Sizes: score = {} for C in Cs: ssvm = OneSlackSSVM(crf, max_iter=200, C=C) ssvm.fit(X_train[:b], Y_train[:b]) score[C] = ssvm.score(X_val, Y_val) print('b = ', b, 'C = ', C, ' : ', score[C]) best_C[b] = max(score, key=score.get) error['train', b] = 1. - score[best_C[b]] for b in Train_Sizes: ssvm = OneSlackSSVM(crf, max_iter=200, C=best_C[b]) ssvm.fit(X_train[:b], Y_train[:b]) error['test', b] = 1. - ssvm.score(X_test, Y_test) plt.xlabel('Size of the training set') plt.ylabel('Error') plt.plot(Train_Sizes, [error['train', b] for b in Train_Sizes], label='train') plt.plot(Train_Sizes, [error['test', b] for b in Train_Sizes], label='test') plt.legend()
vfnumpypath = "../vflabelnumpy/" Xval = np.loadtxt(vfnumpypath + "Xval.txt") Yval = np.loadtxt(vfnumpypath + "Yval.txt", dtype="int") print("val end export") Xtrain = np.loadtxt(vfnumpypath + "Xtrain.txt") Ytrain = np.loadtxt(vfnumpypath + "Ytrain.txt", dtype="int") print("train end export") #independent Model independent_model = MultiLabelClf(inference_method='unary') independent_ssvm = OneSlackSSVM(independent_model, C=.1, tol=0.01) print("fitting independent model...") independent_ssvm.fit(Xtrain, Ytrain) #print np.vstack(independent_ssvm.predict(Xval))[1,:] print("Test exact matching ratio: %f" % check_exactmatchratio( Yval, np.vstack(independent_ssvm.predict(Xval)), datatotest)) print( f1_score(Yval[3, :], np.vstack(independent_ssvm.predict(Xtrain))[3, :], average='macro')) ''' print("Training loss independent model: %f" % hamming_loss(Ytrain, np.vstack(independent_ssvm.predict(Xtrain)))) print("Test loss independent model: %f" % hamming_loss(Yval, np.vstack(independent_ssvm.predict(Xval))))
n_labels = y_train.shape[1] full = np.vstack([x for x in itertools.combinations(range(n_labels), 2)]) tree = chow_liu_tree(y_train) full_model = MultiLabelClf(edges=full, inference_method='qpbo') independent_model = MultiLabelClf(inference_method='unary') tree_model = MultiLabelClf(edges=tree, inference_method="max-product") full_ssvm = OneSlackSSVM(full_model, inference_cache=50, C=.1, tol=0.01) tree_ssvm = OneSlackSSVM(tree_model, inference_cache=50, C=.1, tol=0.01) independent_ssvm = OneSlackSSVM(independent_model, C=.1, tol=0.01) print("fitting independent model...") independent_ssvm.fit(X_train, y_train) print("fitting full model...") full_ssvm.fit(X_train, y_train) print("fitting tree model...") tree_ssvm.fit(X_train, y_train) print("Training loss independent model: %f" % hamming_loss(y_train, np.vstack(independent_ssvm.predict(X_train)))) print("Test loss independent model: %f" % hamming_loss(y_test, np.vstack(independent_ssvm.predict(X_test)))) print("Training loss tree model: %f" % hamming_loss(y_train, np.vstack(tree_ssvm.predict(X_train)))) print("Test loss tree model: %f" % hamming_loss(y_test, np.vstack(tree_ssvm.predict(X_test))))