/
loaddata.py
205 lines (170 loc) · 6.77 KB
/
loaddata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
import scipy.io as sio
from defs import *
import matplotlib.pyplot as plt
from sklearn import preprocessing
from scipy import ndimage
import util
def load_data(valid_count, train_count, filename):
mat = sio.loadmat(filename)
labels = mat['tr_labels'] #(2925, 1) examples
#print labels
images = mat['tr_images'].T #(32, 32, 2925)
ids = mat['tr_identity'] #(2925, 1)
# print images[0]
# ShowMeans(images[0].reshape(1, 1024).T)
# ShowMeans(images[0].reshape(1, 1024).T)
# ShowMeans(preprocessing.scale(np.float32(images[0].reshape(1, 1024).T)))
persons = create_persons(labels, images, ids)
# split into training set and validation set
# 743 persons
valid_targets, valid_ids, valid_data, valid_keys = create_sub_set_rand(persons, valid_count)
# remove the persons used for validation set
for key in valid_keys:
del persons[key]
# print valid_targets.shape, valid_ids.shape, valid_data.shape
# get training set
train_targets, train_ids, train_data, train_keys = create_sub_set_rand(persons, train_count)
# train_data = edge_detect(train_data)
# valid_data = edge_detect(valid_data)
return valid_targets, valid_ids, valid_data, train_targets, train_ids, train_data
"""
input:
file name to load the test image
output:
array of test data (h*w, #samples)
"""
def load_data_test(filename):
# #['__version__', 'public_test_images', '__header__', '__globals__']
# # test_data.shape = (32, 32, 418)
mat = sio.loadmat(filename)
#print labels
images = mat['public_test_images'].T #(32, 32, 2925)
ids = np.arange(images.shape[0])
ids = ids.reshape((ids.shape[0], 1))
labels = np.zeros(ids.shape)#(2925, 1) examples
# print images[0]
# ShowMeans(images[0].reshape(1, 1024).T)
# ShowMeans(images[0].reshape(1, 1024).T)
# ShowMeans(preprocessing.scale(np.float32(images[0].reshape(1, 1024).T)))
persons = create_persons(labels, images, ids)
# split into training set and validation set
# 743 persons
_, _, test_data, _ = create_sub_set(persons, labels.shape[0])
# test_data = edge_detect(test_data)
return test_data
"""
input:
labels, ids: (#samples, 1) array of labeles and ids
data: (h, w, samples) array of data
output:
percentage error
"""
def create_persons(labels, images, ids):
persons = {};
# collect data for each individual
for i in xrange(labels.shape[0]):
p_id = ids[i][0]
if (not persons.has_key(p_id)):
persons[p_id] = Person(p_id)
persons[p_id].data.append(images[i])
persons[p_id].expressions.append(labels[i][0])
return persons
"""
Take the persons dictionary and takes out first "N" samples for the set
Also return a list of keys of the persons used for set
Note that the number of samples returned may be less than count, as the data about
the same person must stay in the same set. There is not splitting
input:
persons: dictionary of persons, by id
count: MAXIMUM number of samples in the returned set
output:
subset of data and keys used to extract the data
"""
def create_sub_set(persons, count):
targets = np.zeros((count))
ids = np.zeros((count))
data = np.zeros((count, 32, 32))
keys = []
counter = 0
for key in persons:
for i in range(len(persons[key].expressions)):
if (counter < count):
targets[counter] = persons[key].expressions[i]
ids[counter] = persons[key].id
data[counter] = persons[key].data[i]
counter += 1
else:
if (i == 0):
keys.append(key)
data = util.preprocess_image(data)
return targets.reshape(1, targets.shape[0]), ids.reshape(1, ids.shape[0]), data, keys
else:
while (i > 0):
targets = np.delete(targets, -1)
ids = np.delete(ids,-1)
data = np.delete(data, -1, axis=0)
i -= 1
# keys.append(key)
data = util.preprocess_image(data)
return targets.reshape(1, targets.shape[0]), ids.reshape(1, ids.shape[0]), data, keys
keys.append(key)
# in this case all persons finished
# in case count >> the number of samples, remove zeros in the return arrays
while(counter < count - 1):
targets = np.delete(targets, -1)
ids = np.delete(ids, -1)
data = np.delete(data, -1, axis=0)
counter += 1
data = util.preprocess_image(data)
return targets.reshape(1, targets.shape[0]), ids.reshape(1, ids.shape[0]), data, keys
"""
Take the persons dictionary and takes out random "N" samples for the set
Also return a list of keys of the persons used for set
Note that the number of samples returned may be less than count, as the data about
the same person must stay in the same set. There is not splitting
input:
persons: dictionary of persons, by id
count: MAXIMUM number of samples in the returned set
output:
subset of data and keys used to extract the data
"""
def create_sub_set_rand(persons, count):
targets = np.zeros((count))
ids = np.zeros((count))
data = np.zeros((count, 32, 32))
keys = []
counter = 0
# construct keys
shuffle_keys = persons.keys()
np.random.shuffle(shuffle_keys)
for key in shuffle_keys:
for i in range(len(persons[key].expressions)):
if (counter < count):
targets[counter] = persons[key].expressions[i]
ids[counter] = persons[key].id
data[counter] = persons[key].data[i]
counter += 1
else:
if (i == 0):
keys.append(key)
data = util.preprocess_image(data)
return targets.reshape(1, targets.shape[0]), ids.reshape(1, ids.shape[0]), data, keys
else:
while (i > 0):
targets = np.delete(targets, -1)
ids = np.delete(ids,-1)
data = np.delete(data, -1, axis=0)
i -= 1
# keys.append(key)
data = util.preprocess_image(data)
return targets.reshape(1, targets.shape[0]), ids.reshape(1, ids.shape[0]), data, keys
keys.append(key)
# in this case all persons finished
# in case count >> the number of samples, remove zeros in the return arrays
while(counter < count - 1):
targets = np.delete(targets, -1)
ids = np.delete(ids, -1)
data = np.delete(data, -1, axis=0)
counter += 1
data = util.preprocess_image(data)
return targets.reshape(1, targets.shape[0]), ids.reshape(1, ids.shape[0]), data, keys