def test_compute_embedding(check_asserts=True): np.random.seed(0) random.seed(0) # sample data from the digits 8x8 pixels dataset digits = datasets.load_digits() data = digits.data n_samples, n_features = data.shape low_dim = 2 # baseline score using a random 2D projection projection = random_project(data, target_dim=2, rng=np.random) score = local_match(data, projection, query_size=50, ratio=0.1, seed=0) assert_almost_equal(score, 0.12, 2) # compute an embedding of the data embedder = SDAEmbedder( (n_features, 40, 15, low_dim), noise=0.1, reconstruction_penalty=0.0, embedding_penalty=1, sparsity_penalty=0.0, learning_rate=0.1, seed=0, ) embedder.pre_train(data, epochs=500, batch_size=5) code = embedder.encode(data) assert_equal(code.shape, (n_samples, low_dim)) # compare nearest neighbors score = local_match(data, code, query_size=50, ratio=0.1, seed=0) assert_almost_equal(score, 0.33, 1)
data, manifold = swissroll.load( n_features=n_features, n_samples=n_samples, n_turns=1.2, radius=1., hole=False, ) score_manifold_data = local_match( data, manifold, query_size=50, ratio=1, seed=0) print "kNN score match manifold/data:", score_manifold_data # build model to extract the manifold and learn a mapping / encoder to be able # to reproduce this on test data embedder = SDAEmbedder((n_features, 10, 2), noise=0.1, reconstruction_penalty=1.0, embedding_penalty=0.1, sparsity_penalty=0.0, learning_rate=0.1, seed=0) # use the randomly initialized encoder to measure the baseline code = embedder.encode(data) score_code_data = local_match(data, code, query_size=50, ratio=1, seed=0) print "kNN score match after pre-training code/data:", score_code_data fig = pl.figure(1) _, _, corr = pairwise_distances(data, code, ax=fig.add_subplot(3, 1, 1), title="random") print "Pairwise distances correlation:", corr print "Training encoder to unroll the embedded data..." start = time.time() embedder.pre_train(data, slice_=slice(None, None), epochs=1000, batch_size=100)
score = local_match(data, stacked_manifolds, query_size=50, ratio=1, seed=0) print "kNN score match manifolds/data (ground truth):", score # compute the score of a projection projection = random_project(data, target_dim=2, rng=np.random) score = local_match(data, projection, query_size=50, ratio=1, seed=0) print "kNN score match projection/data (baseline):", score # reshuffle the data since stochastic gradient descent assumes I.I.D. samples perm = np.random.permutation(data.shape[0]) data, colors = data[perm], colors[perm] # build model to extract the manifolds and learn a mapping / encoder to be able # to reproduce this on test data embedder = SDAEmbedder((n_features, 30, 10, 2), noise=0.1, embedding_penalty=1.0, reconstruction_penalty=0.1, sparsity_penalty=0.0, learning_rate=0.1, seed=0) random_code = embedder.encode(data) score = local_match(data, random_code, query_size=50, ratio=1, seed=0) print "kNN score match random code/data:", score print "Training encoder to unroll the embedded data..." start = time.time() embedder.pre_train(data, epochs=500, batch_size=5) print "done in %ds" % (time.time() - start) code = embedder.encode(data) score = local_match(data, code, query_size=50, ratio=1, seed=0) print "kNN score match code/data:", score