-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
78 lines (67 loc) · 2.88 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# Author: MeiXing Dong
import autoencoder
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import mnist_loader
import numpy as np
import pickle
import sklearn.manifold
def main():
# Load MNIST
# training_input = mnist_loader.load_training_input()
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
# Unzipping gives tuples, but we want arrays of values.
training_input = [x for x in zip(*training_data)[0]]
test_input = [x for x in zip(*test_data)[0]]
print type(training_input[0])
#print(training_input[0][:10])
#print(autoencoder.add_noise(training_input[0])[:10])
# Make autoencoder network
input_size = len(training_input[0])
hidden_size = 100
auto_enc = autoencoder.Autoencoder(input_size, hidden_size)
#auto_enc.sgd(training_input, 5, 100, 3.0)
auto_enc.sgd(training_input, 5, 100, 3.0, test_input)
# Save the trained autoencoder to file.
outfile = open("trained_autoencoder.pkl", "w")
pickle.dump(auto_enc, outfile)
mse_error = auto_enc.test(test_input)
print "mse_error: ", mse_error
def visualize():
infile = open("trained_autoencoder.pkl")
auto_enc = pickle.load(infile)
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
# Unzipping gives tuples, but we want arrays of values.
training_input = [x for x in zip(*training_data)[0]]
test_input = [x for x in zip(*test_data)[0]]
# Get the y values.
test_target = [y for y in zip(*test_data)[1]]
print test_target[0]
'''
# Encode all of the MNIST test set using the autoencoder.
# TODO: get rid of debugging, do all points not just 50
print "Encoding MNIST using autoencoder..."
autoencoder_encoded_vecs = [auto_enc.feedforward(test_inp, embed=True).transpose()[0] for test_inp in test_input]
print len(autoencoder_encoded_vecs)
# print autoencoder_encoded_vecs[0]
# print autoencoder_encoded_vecs[0].shape
'''
# Do dimensionality reduction into 2 dimensions using t-sne.
print "Performing dimensionality reduction using t-sne..."
tsne = sklearn.manifold.TSNE()
# Try just using tsne on the raw MNIST digits data.
autoencoder_encoded_vecs = [inp.transpose()[0] for inp in test_input]
reduced_vecs = tsne.fit_transform(autoencoder_encoded_vecs)
print reduced_vecs[0]
#plt.plot([p[0] for p in reduced_vecs[:30]], [p[1] for p in reduced_vecs[:30]], 'ro')
# Graph all of the points, where points corresponding to the same digit will have the same color.
colors = ['r', 'b', 'g', 'c', 'm', 'k', 'y', (.2, .2, .2), (.4, 0, .5), (.8, .2, 0)]
red_patch = mpatches.Patch(color='red', label='1')
patches = [mpatches.Patch(color=colors[i], label='%i'% i) for i in range(len(colors))]
plt.legend(handles=patches)
for i in range(len(reduced_vecs)):
plt.plot([reduced_vecs[i][0]], [reduced_vecs[i][1]], 'o', color=colors[test_target[i]])
plt.show()
if __name__ == "__main__":
#main()
visualize()