-
Notifications
You must be signed in to change notification settings - Fork 0
/
autoencoder.py
159 lines (117 loc) · 5.41 KB
/
autoencoder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# coding: utf-8
import theano
import theano.tensor as T
from theano.tensor.shared_randomstreams import RandomStreams
import os
import sys
import numpy as np
import time
from utils import tile_raster_images
import PIL.Image as Image
from logistic_regression import load_data
class dA(object):
## theano_rngはTheanoの乱数発生器、n_visibleは入力データの次元、n_hiddenが隠れ層の次元
def __init__(self, numpy_rng, theano_rng=None, input=None, n_visible=784, n_hidden=500, W=None, bhid=None, bvis=None):
self.n_visible = n_visible
self.n_hidden = n_hidden
if not theano_rng:
theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
# Wの共有変数
if not W:
# Wの行列はn_visible * n_hiddenだが、minibatch単位の計算を行うため、input * W になるので n_visible * n_hidden でシェイプを定義
initial_W = np.asarray(numpy_rng.uniform(low=-4*np.sqrt(6./(n_hidden+n_visible)), high=4*np.sqrt(6./(n_hidden+n_visible)), size=(n_visible, n_hidden)), dtype=theano.config.floatX)
W = theano.shared(value=initial_W, name='W', borrow=True)
# 0で入力層のbiasを初期化した共有変数
if not bvis:
bvis = theano.shared(value=np.zeros(n_visible, dtype=theano.config.floatX), borrow=True)
# 0で隠れ層のbiasを初期化した共有変数
if not bhid:
bhid = theano.shared(value=np.zeros(n_hidden, dtype=theano.config.floatX), borrow=True)
# 順方向の重みW
self.W = W
# bは隠れ層のbias
self.b = bhid
# b_prime(bのチルダ)は入力層のbiasと同じ
self.b_prime = bvis
# 逆方向の重み
# tied weights, weght tying
self.W_prime = self.W.T
# rng
self.theano_rng = theano_rng
if input is None:
self.x = T.dmatrix(name='input')
else:
self.x = input
# 最適化パラメータ
self.params = [self.W, self.b, self.b_prime]
def get_corrupted_input(self, input, corruption_level):
""" ノイズ画像の生成(binomial) """
return self.theano_rng.binomial(size=input.shape, n=1, p=1-corruption_level, dtype=theano.config.floatX)*input
def get_hidden_values(self, input):
""" 入力層の値を隠れ層の値に変換 """
return T.nnet.sigmoid(T.dot(input, self.W) + self.b)
def get_reconstructed_input(self, hidden):
""" 隠れ層の値を入力層の値に変換 """
return T.nnet.sigmoid(T.dot(hidden, self.W_prime) + self.b_prime)
def get_cost_updates(self, corruption_level, learning_rate):
""" コストを計算してパラメータをアップデート """
# xにnoiseを付加する(denoise)
tilde_x = self.get_corrupted_input(self.x, corruption_level)
# 入力を変換
# denoise
y = self.get_hidden_values(tilde_x)
# normal
# y = self.get_hidden_values(self.x)
# 変換した値を逆変換で元に戻して再構成
z = self.get_reconstructed_input(y)
# 元の入力と再構成した入力との交差エントロピー
# xが0か1の2値を取る場合のエントロピーのはずだが、画像x
L = - T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z), axis=1)
cost = T.mean(L)
# 誤差関数の微分
gparams = T.grad(cost, self.params)
# 更新式のシンボル
updates = [(param, param - learning_rate * gparam) for param, gparam in zip(self.params, gparams)]
return cost, updates
def test_autoencoder():
learning_rate = 0.1
training_epochs = 30
batch_size = 20
datasets = load_data('data/mnist.pkl.gz')
train_set_x = datasets[0][0]
# ミニバッチの数(教師データをbatch数で割るだけ)
n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
# ミニバッチのindexシンボル
index = T.lscalar()
# ミニバッチの学習データシンボル
x = T.matrix('x')
rng = np.random.RandomState(123)
theano_rng = RandomStreams(rng.randint(2 ** 30))
# autoencoder モデル
da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=28*28, n_hidden=500)
# コスト関数と更新式のシンボル
cost, updates = da.get_cost_updates(corruption_level=0.0, learning_rate=learning_rate)
# trainingの関数
train_da = theano.function([index], cost, updates=updates, givens={
x : train_set_x[index*batch_size : (index+1)*batch_size]
})
fp = open("log/ae_cost.txt", "w")
# training
start_time = time.clock()
for epoch in xrange(training_epochs):
c = []
for batch_index in xrange(n_train_batches):
c.append(train_da(batch_index))
print 'Training epoch %d, cost ' % epoch, np.mean(c)
fp.write('%d\t%f\n' % (epoch, np.mean(c)))
end_time = time.clock()
training_time = (end_time - start_time)
fp.close()
print "The no corruption code for file " + os.path.split(__file__)[1] + " ran for %.2fm" % ((training_time / 60.0))
image = Image.fromarray(tile_raster_images(
X=da.W.get_value(borrow=True).T,
img_shape=(28, 28), tile_shape=(10, 10),
tile_spacing=(1, 1)))
image.save('log/dae_filters_corruption_00.png')
if __name__ == "__main__":
test_autoencoder()