/
minimum_wiring.py
322 lines (276 loc) · 11.4 KB
/
minimum_wiring.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
import os
import theano
import scipy
import numpy as np
import pylab as pl
import cPickle as pickle
import theano.tensor as t
import sparse_filtering as sf
import utilities.init as init
from utilities.BP import backprop
from scipy.io import loadmat, savemat
from utilities.connections import distMat
from utilities.visualize import drawplots
# switches
convolutional = 'n'
def rectify(X):
return t.maximum(X, 0.)
def softmax(X):
e_x = t.exp(X - X.max(axis=1).dimshuffle(0, 'x'))
return e_x / e_x.sum(axis=1).dimshuffle(0, 'x')
def RMSprop(cost, params, lr=0.001, rho=0.9, epsilon=1e-6):
grads = t.grad(cost=cost, wrt=params)
updates = []
for p, g in zip(params, grads):
acc = theano.shared(p.get_value() * 0.)
acc_new = rho * acc + (1 - rho) * g ** 2
gradient_scaling = t.sqrt(acc_new + epsilon)
g = g / gradient_scaling
updates.append((acc, acc_new))
updates.append((p, p - lr * g))
return updates
def final_layer(X, w_out):
out = softmax(t.dot(X, w_out))
return out
# def distMat(neurons):
#
# # define dimension of cortical sheet
# dim = np.sqrt(neurons)
#
# # create coordinates
# coordinates = []
# for i in range(int(dim)):
# for j in range(int(dim)):
# coordinates.append([i, j])
#
# # get distance from center position
# center = [np.ceil(dim / 2), np.ceil(dim / 2)]
# distances = scipy.spatial.distance.cdist(coordinates, np.atleast_2d(center)).reshape((dim, dim))
#
# # roll it to first position
# back = int(np.floor(dim / 2))
# distances = np.roll(distances, -back, axis=0)
# distances = np.roll(distances, -back, axis=1)
#
# return distances
# load in the network(s; topological and non-topological) into a dictionary
print "loading model(s)..."
models = {}
model_folders = ['SF', 'tSF']
model_names = ['ConvolutionalSF_model', 'TopologicalConvolutionalSF_model']
base_path = os.path.dirname(__file__)
for model in xrange(len(model_names)):
# model_file_name = model_names[model] + ".pkl"
file_path = os.path.join(base_path, "saved", model_folders[model], 'model.pkl')
models[model_names[model]] = pickle.load(open(file_path, 'rb'))
# load in the training and testing data (should be preprocessed)
print "loading data..."
if convolutional == 'n':
file_name = "patches.mat"
file_path = os.path.join(base_path, "data", file_name)
data = loadmat(file_path)['X']
data = np.float32(data.T)
elif convolutional == 'y':
train_file_name = "STL_10_lcn_train.mat"
test_file_name = "STL_10_lcn_test.mat"
train_file_path = os.path.join(base_path, "data", train_file_name)
test_file_path = os.path.join(base_path, "data", test_file_name)
train_data = loadmat(train_file_path)['X']
test_data = loadmat(test_file_path)['X']
train_data = np.float32(train_data)
test_data = np.float32(test_data)
# load in the corresponding labels
if convolutional == 'y':
print "loading labels..."
train_labels_file = "train.mat"
test_labels_file = "test.mat"
train_labels_path = os.path.join(base_path, "data", train_labels_file)
test_labels_path = os.path.join(base_path, "data", test_labels_file)
train_labels = loadmat(train_labels_path)['y']
test_labels = loadmat(test_labels_path)['y']
# compile functions and grab test
print "compiling theano functions..."
test = {}
for model in model_names:
if convolutional == 'n':
_, test[model], _ = models[model].training_functions(data) # using output as test
elif convolutional == 'y':
_, _, test[model] = models[model].training_functions(train_data)
# get the output activations of the last layer in the network(s) for next layer / training and test data
if convolutional == 'n':
print "getting output of (both) model(s) for training second layer..."
out = {}
for model in model_names:
out[model] = test[model][models[model].n_layers - 1](data)
# print out[model][0].T[0:625].shape
# drawplots(out[model][0].T[0:625].shape, color='gray', convolution='n', pad=0, examples=None, channels=1)
elif convolutional == 'y':
print "getting output of (both) model(s) for train and test data..."
train_out = {}
test_out = {}
for model in model_names:
temp = test[model][models[model].n_layers - 1](train_data)
train_out[model] = temp.reshape(temp.shape[0], temp.shape[1] * temp.shape[2] * temp.shape[3])
# train a fully connected network on the output from the training data test the classification accuracy of the final
# layer based on the outputs for the test data (optional)
# OR
# train a fully connected sparse filtering network on the second layer
final_weights = {}
weights = None
for model in model_names:
if convolutional == 'n':
# construct the network
print "building model..."
model_ = sf.Network(
model_type=['SparseFilter'],
weight_dims=([100, 625],),
p=None,
group_size=None,
step=None,
lr=0.001,
opt='GD',
c=convolutional,
test='n',
batch_size=50000,
random='n',
weights=None
)
# normalize the training data
data = out[model][0].T
data = data - np.tile(data.mean(axis=1), (625, 1)).T
# compile the training, output, and test functions for the network
print "compiling theano functions..."
train, _, _ = model_.training_functions(data)
# train the sparse filtering network
print "training network..."
for epoch in xrange(200): # 100
cost, weights = train[0](index=0)
print("Layer %i cost at epoch %i and batch %i: %f" % (1, epoch, 0, cost))
elif convolutional == 'y':
print "setting up network for" + model
X = t.ftensor4()
Y = t.fmatrix()
w_out = init.init_weights((train_out[model].shape[1], 10))
py_x = final_layer(X, w_out)
y_x = t.argmax(py_x, axis=1)
cost = t.mean(t.nnet.categorical_crossentropy(py_x, Y))
params = [w_out]
updates = RMSprop(cost, params, lr=0.001)
print "compiling theano functions..."
train = theano.function(inputs=[X, Y], outputs=[cost, w_out], updates=updates, allow_input_downcast=True)
predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True)
print "training fully connected layer..."
max_iter = 100
batch_size = 1000
n_batches = train_out[model] / batch_size
for iteration in range(max_iter):
for batch in xrange(n_batches):
batch_begin = batch * batch_size
batch_end = batch_begin + batch_size
cost, weights = train(train_out[batch_begin:batch_end])
print "Cost at iteration %d and batch %d for model " % (iteration, batch) + model + ": %d" % cost
accuracy = float(np.mean(np.argmax(test_labels, axis=1) == predict(test_out)))
print "Classification performance for model " + model + " at iteration %d: %f" % (
iteration,
accuracy
)
# save weights
final_weights[model] = weights
# visualize the weights for each class / neuron across the cortical sheet
print "visualizing weights..."
if convolutional == 'n':
for model in model_names:
drawplots(final_weights[model].T, color='gray', convolution='n', pad=0, examples=None, channels=1)
elif convolutional == 'y':
for model in model_names:
for category in xrange(train_labels.shape[1]):
w = final_weights[model][:, category]
w = w.reshape(np.sqrt(w.shape[0]), np.sqrt(w.shape[0]))
pl.subplot(2, 5, category + 1)
pl.imshow(w)
pl.title("Weight distributions for model " + model)
pl.show()
# find optimal neuronal positions (for N random initial positions)
print "finding optimal neuronal positions..."
optimal_positions = {}
minimal_wiring_length = {}
# initial_distance = distMat(weights.shape[0])
for model in model_names:
entity = None
optimal_positions[model] = []
minimal_wiring_length[model] = []
if convolutional == 'n':
entity = final_weights[model].shape[0]
elif convolutional == 'y':
entity = train_labels.shape[1]
for neuron in xrange(entity):
weights = final_weights[model][neuron, :]
distances = distMat(len(weights), d=None, kind='euclidean', inverted='n')
wiring_lengths = np.dot(np.abs(weights), distances.T) # should this be transposed?
minimum_wiring = np.min(wiring_lengths)
XY = np.argmin(wiring_lengths)
optimal_positions[model].append(XY) # [X, Y] # todo: convert to coordinates
minimal_wiring_length[model].append(minimum_wiring)
# w = t.fvector()
# d_mat = t.fmatrix()
# # d_mat = t.fvector()
#
# x = theano.shared(np.asarray(np.floor(np.sqrt(final_weights[model].shape[0]) / 2), dtype=theano.config.floatX))
# y = theano.shared(np.asarray(np.floor(np.sqrt(final_weights[model].shape[0]) / 2), dtype=theano.config.floatX))
#
# d = t.roll(d_mat, x, axis=0) # todo: figure out how to convert to int while still being differentiable
# d = t.roll(d, y, axis=1)
#
# # distances = distMat(x, y)
#
# cost = t.dot(d.flatten(), w)
#
# parameters = [x, y]
# updates = RMSprop(cost, parameters)
#
# train = theano.function(inputs=[w, d_mat], outputs=[x, y, cost], updates=updates)
#
# max_iter = 100
# # X = int(np.floor(np.sqrt(w.shape[0]) / 2))
# # Y = int(np.floor(np.sqrt(w.shape[0]) / 2))
# weights = final_weights[model][:, neuron]
# for iteration in xrange(max_iter):
#
# # distances = np.roll(initial_distance, X, axis=0)
# # distances = np.roll(distances, Y, axis=1)
#
# X, Y, cost = train(weights, initial_distance)
# # X = np.round(X)
# # Y = np.round(Y)
#
# optimal_positions[model][neuron] = [X, Y]
# minimal_wiring_length[model][neuron] = cost
# compare minimal wiring lengths
avg_wiring_length = {}
for model in model_names:
# accumulator = 0
# if convolutional == 'n':
# entity = final_weights[model].shape[0]
# elif convolutional == 'y':
# entity = train_labels.shape[1]
# for neuron in xrange(entity):
# accumulator += minimal_wiring_length[model][neuron]
avg_wiring_length[model] = np.mean(minimal_wiring_length[model])
std = np.std(minimal_wiring_length[model])
# accumulator / train_labels.shape[1]
print "Wiring length for model %s: %0.4f +/- (%0.4f)" % (model, avg_wiring_length[model], std)
# plot differences in bar graph
pl.bar(np.arange(len(model_names)), (avg_wiring_length[model_names[0]], avg_wiring_length[model_names[1]]))
pl.show()
pl.figure(2)
bins = np.linspace(0, 1, 100)
divider = np.amax(np.concatenate((minimal_wiring_length[model_names[0]], minimal_wiring_length[model_names[1]])))
sf_mwl = minimal_wiring_length[model_names[0]] / divider
tsf_mwl = minimal_wiring_length[model_names[1]] / divider
pl.hist(sf_mwl, bins=bins, alpha=0.5, label='Sparse Filtering')
pl.hist(tsf_mwl, bins=bins, alpha=0.5, label='Topographic Sparse Filtering')
pl.legend(loc='upper right')
pl.xlabel('Wiring Length')
pl.ylabel('Frequency')
pl.title('Minimum Wiring Length')
pl.show()