def test_latent_node_boxes_latent_subgradient(): # same as above, now with elementary subgradients # learn the "easy" 2x2 boxes dataset. # a 2x2 box is placed randomly in a 4x4 grid # we add a latent variable for each 2x2 patch # that should make the model fairly simple X, Y = toy.make_simple_2x2(seed=1) latent_crf = LatentNodeCRF(n_labels=2, inference_method='lp', n_hidden_states=2, n_features=1) latent_svm = LatentSubgradientSSVM(model=latent_crf, max_iter=250, C=10, verbose=10, learning_rate=0.1, momentum=0) G = [make_grid_edges(x) for x in X] # make edges for hidden states: edges = [] node_indices = np.arange(4 * 4).reshape(4, 4) for i, (x, y) in enumerate(itertools.product([0, 2], repeat=2)): for j in xrange(x, x + 2): for k in xrange(y, y + 2): edges.append([i + 4 * 4, node_indices[j, k]]) G = [np.vstack([make_grid_edges(x), edges]) for x in X] # reshape / flatten x and y X_flat = [x.reshape(-1, 1) for x in X] Y_flat = [y.ravel() for y in Y] X_ = zip(X_flat, G, [4 * 4 for x in X_flat]) latent_svm.fit(X_, Y_flat) assert_equal(latent_svm.score(X_, Y_flat), 1)
def main(): X, Y = toy.generate_crosses(n_samples=20, noise=5, n_crosses=1, total_size=8) X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.5) n_labels = len(np.unique(Y_train)) crf = LatentGridCRF(n_labels=n_labels, n_states_per_label=[1, 2], inference_method='lp') #clf = LatentSSVM(model=crf, max_iter=500, C=1000., verbose=2, #check_constraints=True, n_jobs=-1, break_on_bad=True, #base_svm='1-slack', inference_cache=20, tol=.1) clf = LatentSubgradientSSVM( model=crf, max_iter=500, C=1000., verbose=2, n_jobs=-1, learning_rate=0.1, show_loss_every=10) clf.fit(X_train, Y_train) #for X_, Y_, H, name in [[X_train, Y_train, clf.H_init_, "train"], #[X_test, Y_test, [None] * len(X_test), "test"]]: for X_, Y_, H, name in [[X_train, Y_train, [None] * len(X_test), "train"], [X_test, Y_test, [None] * len(X_test), "test"]]: Y_pred = clf.predict(X_) i = 0 loss = 0 for x, y, h_init, y_pred in zip(X_, Y_, H, Y_pred): loss += np.sum(y != y_pred) fig, ax = plt.subplots(3, 2) ax[0, 0].matshow(y, vmin=0, vmax=crf.n_labels - 1) ax[0, 0].set_title("ground truth") ax[0, 1].matshow(np.argmax(x, axis=-1), vmin=0, vmax=crf.n_labels - 1) ax[0, 1].set_title("unaries only") if h_init is None: ax[1, 0].set_visible(False) else: ax[1, 0].matshow(h_init, vmin=0, vmax=crf.n_states - 1) ax[1, 0].set_title("latent initial") ax[1, 1].matshow(crf.latent(x, y, clf.w), vmin=0, vmax=crf.n_states - 1) ax[1, 1].set_title("latent final") ax[2, 0].matshow(crf.inference(x, clf.w), vmin=0, vmax=crf.n_states - 1) ax[2, 0].set_title("prediction latent") ax[2, 1].matshow(y_pred, vmin=0, vmax=crf.n_labels - 1) ax[2, 1].set_title("prediction") for a in ax.ravel(): a.set_xticks(()) a.set_yticks(()) fig.savefig("data_%s_%03d.png" % (name, i), bbox_inches="tight") i += 1 print("loss %s set: %f" % (name, loss)) print(clf.w)
def test_directional_bars(): for inference_method in ['lp']: X, Y = toy.generate_easy(n_samples=10, noise=5, box_size=2, total_size=6, seed=1) n_labels = 2 crf = LatentDirectionalGridCRF(n_labels=n_labels, n_states_per_label=[1, 4], inference_method=inference_method) clf = LatentSubgradientSSVM(model=crf, max_iter=500, C=10. ** 5, verbose=2) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(np.array(Y_pred), Y)
def test_directional_bars(): # this test is very fragile :-/ X, Y = toy.generate_easy(n_samples=20, noise=2, box_size=2, total_size=6, seed=2) n_labels = 2 crf = LatentDirectionalGridCRF(n_labels=n_labels, n_states_per_label=[1, 4]) clf = LatentSubgradientSSVM(model=crf, max_iter=75, C=10., learning_rate=1, momentum=0, decay_exponent=0.5, decay_t0=10) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(np.array(Y_pred), Y)
def test_objective(): # test that LatentSubgradientSSVM does the same as SubgradientSVM, # in particular that it has the same loss, if there are no latent states. X, Y = toy.generate_blocks_multinomial(n_samples=10) n_labels = 3 crfl = LatentGridCRF(n_labels=n_labels, n_states_per_label=1) clfl = LatentSubgradientSSVM(model=crfl, max_iter=50, C=10., learning_rate=0.001, momentum=0.98, decay_exponent=0) clfl.w = np.zeros(crfl.size_psi) # this disables random init clfl.fit(X, Y) crf = GridCRF(n_states=n_labels) clf = SubgradientSSVM(model=crf, max_iter=50, C=10., learning_rate=0.001, momentum=0.98, decay_exponent=0) clf.fit(X, Y) assert_array_almost_equal(clf.w, clfl.w) assert_array_equal(clf.predict(X), Y) assert_almost_equal(clf.objective_curve_[-1], clfl.objective_curve_[-1])
def test_with_crosses(): # very simple dataset. k-means init is perfect for n_states_per_label in [2, [1, 2]]: # test with 2 states for both foreground and background, # as well as with single background state #for inference_method in ['ad3', 'qpbo', 'lp']: for inference_method in ['lp']: X, Y = toy.generate_crosses(n_samples=10, noise=5, n_crosses=1, total_size=8) n_labels = 2 crf = LatentGridCRF(n_labels=n_labels, n_states_per_label=n_states_per_label, inference_method=inference_method) clf = LatentSubgradientSSVM(model=crf, max_iter=250, C=10. ** 5, verbose=20, learning_rate=0.0001, show_loss_every=10, momentum=0.98, decay_exponent=0) clf.fit(X, Y) Y_pred = clf.predict(X) assert_array_equal(np.array(Y_pred), Y)
def test_latent_node_boxes_latent_subgradient(): # same as above, now with elementary subgradients X, Y = toy.make_simple_2x2(seed=1) latent_crf = LatentNodeCRF(n_labels=2, n_hidden_states=2, n_features=1) latent_svm = LatentSubgradientSSVM(model=latent_crf, max_iter=250, C=10, learning_rate=0.1, momentum=0) G = [make_grid_edges(x) for x in X] edges = make_edges_2x2() G = [np.vstack([make_grid_edges(x), edges]) for x in X] # reshape / flatten x and y X_flat = [x.reshape(-1, 1) for x in X] Y_flat = [y.ravel() for y in Y] X_ = zip(X_flat, G, [4 * 4 for x in X_flat]) latent_svm.fit(X_, Y_flat) assert_equal(latent_svm.score(X_, Y_flat), 1)
X_train_, X_test_, X_train, X_test, y_train, y_test, y_org_train, y_org_test =\ train_test_split(X_, X, Y, y_org, test_size=.5) # first, do it with a standard CRF / SVM pbl = GraphCRF(n_features=64, n_states=2, inference_method='lp') svm = StructuredSVM(pbl, verbose=1, check_constraints=True, C=1000, n_jobs=1, batch_size=-1) svm.fit(X_train_, y_train) y_pred = np.vstack(svm.predict(X_test_)) print("Score with pystruct crf svm: %f" % np.mean(y_pred == y_test)) print(svm.score(X_train_, y_train)) print(svm.score(X_test_, y_test)) # now with latent CRF SVM latent_pbl = LatentGraphCRF(n_features=64, n_labels=2, n_states_per_label=5, inference_method='dai') latent_svm = LatentSubgradientSSVM(model=latent_pbl, max_iter=5000, C=1, verbose=2, n_jobs=1, learning_rate=0.1, show_loss_every=10, momentum=0.0, decay_exponent=0.5) #latent_svm = LatentSSVM(latent_pbl, verbose=2, check_constraints=True, C=100, #n_jobs=1, batch_size=-1, tol=.1, latent_iter=2) latent_svm.fit(X_train_, y_train) print(latent_svm.score(X_train_, y_train)) print(latent_svm.score(X_test_, y_test)) h_pred = np.hstack(latent_svm.predict_latent(X_test_)) print("Latent class counts: %s" % repr(np.bincount(h_pred)))
G = [make_grid_edges(x) for x in X] asdf = zip(X_flat, G) svm.fit(asdf, Y_flat) plot_boxes(svm.predict(asdf)) print("Training score multiclass svm CRF: %f" % svm.score(asdf, Y_flat)) # using one latent variable for each 2x2 rectangle latent_crf = LatentNodeCRF(n_labels=2, n_features=1, inference_method='lp', n_hidden_states=2) #latent_svm = LatentSSVM(model=latent_crf, max_iter=200, C=10, verbose=10, #check_constraints=True, break_on_bad=True, n_jobs=1, #latent_iter=10, base_svm='subgradient', tol=-1, #inactive_window=0, learning_rate=0.01, momentum=0) latent_svm = LatentSubgradientSSVM(model=latent_crf, max_iter=200, C=100, verbose=1, n_jobs=1, show_loss_every=10, learning_rate=0.01, momentum=0) # make edges for hidden states: edges = [] node_indices = np.arange(4 * 4).reshape(4, 4) for i, (x, y) in enumerate(itertools.product([0, 2], repeat=2)): for j in xrange(x, x + 2): for k in xrange(y, y + 2): edges.append([i + 4 * 4, node_indices[j, k]]) G = [np.vstack([make_grid_edges(x), edges]) for x in X] #G = [make_grid_edges(x) for x in X] #H_init = [np.hstack([y.ravel(), 2 + y[1: -1, 1: -1].ravel()]) for y in Y] H_init = [np.hstack([y.ravel(), np.random.randint(2, 4, size=2 * 2)]) for y in