/
Patel_assignment_06.py
290 lines (230 loc) · 9.24 KB
/
Patel_assignment_06.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
# Patel, Nabilahmed
# 1001-234-817
# 2016-12-04
# Assignment_06
import theano
from theano import tensor as T
from keras.models import Sequential,Model
from keras.layers import Input, Dense, Activation
from keras.models import model_from_json
import numpy as np
import scipy.misc
from scipy import linalg as la
import matplotlib
from matplotlib import pyplot as plt
import os
import sys
from os import listdir
from os.path import isfile, join
import h5py
def modelling(task_no,train_input,test_input,hidden_nodes,number_epoch):
n_hidden = hidden_nodes
nb_classes = 784
optimizer = 'RMSprop'
# optimizer = SGD(lr=0.001)
loss = 'mean_squared_error'
metrics = ['accuracy']
batch_size = 128
nb_epoch = number_epoch
if task_no in ['4','5']:
#laoding model
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("model.h5")
print("Loaded model from disk")
loaded_model.compile(loss=loss, optimizer=optimizer, metrics=metrics)
else:
#initializing model
# input_img = Input(shape=(784,))
# hidden_output = Dense(n_hidden, activation='relu')(input_img)
# final_output = Dense(nb_classes, activation='linear')(hidden_output)
# model = Model(input=input_img, output=final_output)
# model.compile(loss=loss, optimizer=optimizer, metrics=metrics)
model = Sequential()
model.add(Dense(n_hidden, input_shape=(784,), activation='relu'))
model.add(Dense(nb_classes, activation='linear'))
model.compile(loss=loss, optimizer=optimizer, metrics=metrics)
#In all tasks as the target and input is the same we are passing input at both the places
if task_no == '1':
#task1
history = model.fit(train_input,train_input, batch_size=batch_size, nb_epoch=nb_epoch, verbose=2, validation_data=(test_input, test_input), shuffle=True)
return history
elif task_no == '2':
#task2
history = model.fit(train_input,train_input, batch_size=batch_size, nb_epoch=nb_epoch, verbose=2, validation_data=(test_input, test_input), shuffle=True)
train_score = model.evaluate(train_input, train_input, verbose=1)
test_score = model.evaluate(test_input, test_input, verbose=1)
return [train_score[0],test_score[0]]
elif task_no == '3':
#task3
history = model.fit(train_input,train_input, batch_size=batch_size, nb_epoch=nb_epoch, verbose=2, shuffle=True)
#saving mdoel to json file
model_json = model.to_json()
with open("model.json", "w") as json_file:
json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("model.h5")
print("Saved model to disk")
weights, biases = model.layers[0].get_weights()
return weights
elif task_no in ['4','5']:
#task4
output = loaded_model.predict(test_input)
return output
def display_loss(epoch,xlabel,ylabel,train_score,test_score):
#dsiplaying loss
fig1=plt.figure('Loss')
ax1=fig1.add_subplot(111)
ax1.plot(epoch, train_score, 'b-', label='train')
ax1.plot(epoch, test_score, 'g-', label='validation')
ax1.set_xlabel(xlabel)
ax1.set_ylabel(ylabel)
plt.legend()
plt.show()
def display_weights(weights):
#dsiplaying weights
fig, axes = plt.subplots(10, 10, figsize=(12, 12))
fig.suptitle("Weights", fontsize=16)
for i in range(100):
row, column = divmod(i, 10)
axes[row, column].imshow(weights[:, i].reshape(28, 28), cmap=plt.cm.gray)
axes[row, column].axis('off') # get rid of tick marks/labels
plt.show()
def display_images(_input,output):
#displaying input and output image
fig1, axes1 = plt.subplots(10, 10, figsize=(12, 12))
fig1.suptitle("Input", fontsize=16)
fig2, axes2 = plt.subplots(10, 10, figsize=(12, 12))
fig2.suptitle("Output", fontsize=16)
for i in range(100):
row, column = divmod(i, 10)
axes1[row, column].imshow(_input[i,:].reshape(28, 28), cmap=plt.cm.gray)
axes1[row, column].axis('off') # get rid of tick marks/labels
axes2[row, column].imshow(output[i,:].reshape(28, 28), cmap=plt.cm.gray)
axes2[row, column].axis('off') # get rid of tick marks/labels
plt.show()
def display_eign_vecs(_input,output):
#displaying input and output image
fig1, axes1 = plt.subplots(10, 10, figsize=(12, 12))
fig1.suptitle("First 100 Eigen vectors of inputs", fontsize=16)
fig2, axes2 = plt.subplots(10, 10, figsize=(12, 12))
fig2.suptitle("First 100 Eigen vectors of outputs", fontsize=16)
for i in range(100):
row, column = divmod(i, 10)
axes1[row, column].imshow(_input[:,i].reshape(28, 28), cmap=plt.cm.gray)
axes1[row, column].axis('off') # get rid of tick marks/labels
axes2[row, column].imshow(output[:,i].reshape(28, 28), cmap=plt.cm.gray)
axes2[row, column].axis('off') # get rid of tick marks/labels
plt.show()
def find_PCA(data):
# mean center the data
data -= data.mean(axis=0)
# calculate the covariance matrix
cov_mat = np.cov(data, rowvar=False)
# calculate eigenvectors & eigenvalues of the covariance matrix
# use 'eigh' rather than 'eig' since cov_mat is symmetric,
# the performance gain is substantial
eign_vals, eign_vecs = la.eigh(cov_mat)
# sort eigenvalue in decreasing order
idx = np.argsort(eign_vals)[::-1]
eign_vecs = eign_vecs[:,idx]
# sort eigenvectors according to same index
eign_vals = eign_vals[idx]
# select the first 100 eigenvectors
eign_vecs = eign_vecs[:, :100]
# carry out the transformation on the data using eigenvectors
# and return the re-scaled data, eigenvalues, and eigenvectors
print(eign_vecs.shape)
return eign_vecs
def reading_input(path):
#reading and making input
i = 1
files = []
for filename in os.listdir(path):
if isfile(join(path,filename)):
files.append(filename)
files = np.array(files)
#shuffling files
np.random.shuffle(files)
files = files.tolist()
for filename in files:
img = scipy.misc.imread(join(path,filename)).astype(np.float32) #read image and convert to float
img = img.reshape(1,-1) #reshape to column vector
if i == 1:
P = np.array(img)
i = i + 1
else:
#adding each image to final input
P = np.concatenate((P,img),axis=0)
P = np.divide(P,255)
print(P.shape)
return P
#starting...
ds1_input = np.array([])
ds3_input = np.array([])
weights = []
while(True):
epoch = []
number_of_nodes = []
train_score = []
test_score = []
#taking task_no from user via
#command line arguments
task_no = raw_input('please input the task no(1 to 5) enter any other number to quit: ')
#reading training and testing data
if task_no in ['1','2','3','5'] and ds1_input.size == 0:
ds1_input = reading_input("set1_20k/train/")
ds2_input = reading_input("set2_2k/")
elif task_no == '4' and ds3_input.size == 0:
ds3_input = reading_input("set3_100/")
#task1
if task_no == '1':
history = modelling('1',ds1_input,ds2_input,100,50)
epoch = [i for i in range(0,50)]
train_score = history.history['loss']
test_score = history.history['val_loss']
display_loss(epoch,"Number of epoch","Mean Squared Error",train_score,test_score)
#task2
elif task_no == '2':
score = modelling('2',ds1_input,ds2_input,20,50)
train_score.append(score[0])
test_score.append(score[1])
score = modelling('2',ds1_input,ds2_input,40,50)
train_score.append(score[0])
test_score.append(score[1])
score = modelling('2',ds1_input,ds2_input,60,50)
train_score.append(score[0])
test_score.append(score[1])
score = modelling('2',ds1_input,ds2_input,80,50)
train_score.append(score[0])
test_score.append(score[1])
score = modelling('2',ds1_input,ds2_input,100,50)
train_score.append(score[0])
test_score.append(score[1])
epoch = [20,40,60,80,100]
display_loss(epoch,"Number of nodes in hidden layer","Mean Squared Error",train_score,test_score)
#task3
elif task_no == '3':
weights = modelling('3',ds1_input,ds2_input,100,100)
display_weights(weights)
#task4
elif task_no == '4':
if len(weights) != 0:
output = modelling('4',ds1_input,ds3_input,100,100)
display_images(ds3_input,output)
else:
print('first complete 3rd task!')
#task5
elif task_no == '5':
if len(weights) != 0:
output = modelling('5',ds1_input,ds2_input,100,100)
ds2_PCA = find_PCA(ds2_input)
output_PCA = find_PCA(output)
display_eign_vecs(ds2_PCA,output_PCA)
else:
print('first complete 3rd task!')
else:
sys.exit()