/
TransitionClassifier.py
261 lines (235 loc) · 12.3 KB
/
TransitionClassifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
import vigra
import matplotlib.pyplot as plt
from vigra import numpy as np
import h5py
from sklearn.neighbors import KDTree
from sklearn.cross_validation import KFold
from sklearn.metrics import precision_recall_fscore_support
from compiler.ast import flatten
import os
np.seterr(all='raise')
#read in 'n2-n1' of images
def read_in_images(n1,n2, filepath, fileFormatString='{:05}.h5'):
gt_labelimage_filename = [0]*(n2-n1)
for i in range(n1,n2):
gt_labelimage_filename[i-n1] = os.path.join(str(filepath), fileFormatString.format(i))
gt_labelimage = [vigra.impex.readHDF5(gt_labelimage_filename[i], 'segmentation/labels') for i in range(0,n2-n1)]
return gt_labelimage
#compute features from input data and return them
def compute_features(raw_image, labeled_image, n1, n2):
#perhaps there is an elegant way to get into the RegionFeatureAccumulator. For now, the new feature are a separate vector
features = [0]*(n2-n1)
allFeat = [0]*(n2-n1)
for i in range(0,n2-n1):
if len(labeled_image[i].shape) < len(raw_image.shape) - 1:
# this was probably a missing channel axis, thus adding one at the end
labeled_image = np.expand_dims(labeled_image, axis=-1)
features[i] = vigra.analysis.extractRegionFeatures(raw_image[...,i,0].astype('float32'),labeled_image[i][...,0], ignoreLabel=0)
if len(raw_image.shape) < 5:
tempnew1 = vigra.analysis.extractConvexHullFeatures(labeled_image[i][...,0].squeeze().astype(np.uint32), ignoreLabel=0)
tempnew2 = vigra.analysis.extractSkeletonFeatures(labeled_image[i][...,0].squeeze().astype(np.uint32))
allFeat[i] = dict(features[i].items()+tempnew1.items()+tempnew2.items())
else:
allFeat[i] = dict(features[i].items())
return allFeat
#return a feature vector of two objects (f1-f2,f1*f2)
def getFeatures(f1,f2,o1,o2):
res=[]; res2=[]
for key in f1:
if key == "Global<Maximum >" or key=="Global<Minimum >":
# the global min/max intensity is not interesting
continue
elif key == 'RegionCenter':
res.append(np.linalg.norm(f1[key][o1]-f2[key][o2])) #difference of features
res2.append(np.linalg.norm(f1[key][o1]*f2[key][o2])) #product of features
elif key=='Histogram': #contains only zeros, so trying to see what the prediction is without it
continue
elif key == 'Polygon': #vect has always another length for different objects, so center would be relevant
continue
else:
res.append((f1[key][o1]-f2[key][o2]).tolist() ) #prepare for flattening
res2.append((f1[key][o1]*f2[key][o2]).tolist() ) #prepare for flattening
x= np.asarray(flatten(res)) #flatten
x2= np.asarray(flatten(res2)) #flatten
#x= x[~np.isnan(x)]
#x2= x2[~np.isnan(x2)] #not getting the nans out YET
return np.concatenate((x,x2))
#read in 'n2-n1' of labels
def read_positiveLabels(n1,n2, filepath, fileFormatString='{:05}.h5'):
gt_labels_filename = [0]*(n2-n1)
for i in range(n1+1,n2 ): #the first one contains no moves data
gt_labels_filename[i-n1] = os.path.join(str(filepath), fileFormatString.format(i))
gt_labelimage = [vigra.impex.readHDF5(gt_labels_filename[i], 'tracking/Moves') for i in range(1,n2-n1)]
return gt_labelimage
# compute negative labels by nearest neighbor
def negativeLabels(features, positiveLabels):
numFrames = len(features)
neg_lab = []
for i in range(1, numFrames): # for all frames but the first
# print("Frame ", i)
frameNegLab = []
# build kdtree for frame i
kdt = KDTree(features[i]['RegionCenter'][1:,...], metric='euclidean')
# find k=3 nearest neighbors of each object of frame i-1 in frame i
neighb = kdt.query(features[i-1]['RegionCenter'][1:,...], k=3, return_distance=False)
for j in range(0, neighb.shape[0]): # for all objects in frame i-1
for m in range(0, neighb.shape[1]): # for all neighbors
pair = [j + 1, neighb[j][m] + 1]
if pair not in positiveLabels[i-1].tolist():
frameNegLab.append(pair) # add one because we've removed the first element when creating the KD tree
# print(pair)
# else:
# print("Discarding negative example {} which is a positive annotation".format(pair))
neg_lab.append(frameNegLab)
return neg_lab
def find_features_without_NaNs(features):
"""
Remove all features from the list of selected features which have NaNs
"""
selectedFeatures = features[0].keys()
for featuresPerFrame in features:
for key, value in featuresPerFrame.iteritems():
if not isinstance(value, list) and (np.any(np.isnan(value)) or np.any(np.isinf(value))):
try:
selectedFeatures.remove(key)
except:
pass # has already been deleted
forbidden = ["Global<Maximum >", "Global<Minimum >", 'Histogram', 'Polygon']
for f in forbidden:
if f in selectedFeatures:
selectedFeatures.remove(f)
return selectedFeatures
class TransitionClassifier:
def __init__(self, selectedFeatures):
self.rf = vigra.learning.RandomForest()
self.mydata = None
self.labels = []
self.selectedFeatures = selectedFeatures
def addSample(self, f1, f2, label):
#if self.labels == []:
self.labels.append(label)
#else:
# self.labels = np.concatenate((np.array(self.labels),label)) # for adding batches of features
res=[]
res2=[]
for key in selectedFeatures:
if key == "Global<Maximum >" or key=="Global<Minimum >":
# the global min/max intensity is not interesting
continue
elif key == 'RegionCenter':
res.append(np.linalg.norm(f1[key]-f2[key])) #difference of features
res2.append(np.linalg.norm(f1[key]*f2[key])) #product of features
elif key == 'Histogram': #contains only zeros, so trying to see what the prediction is without it
continue
elif key == 'Polygon': #vect has always another length for different objects, so center would be relevant
continue
else:
if not isinstance(f1[key], np.ndarray):
res.append(float(f1[key]) - float(f2[key]) ) #prepare for flattening
res2.append(float(f1[key]) * float(f2[key]) ) #prepare for flattening
else:
res.append((f1[key]-f2[key]).tolist() ) #prepare for flattening
res2.append((f1[key]*f2[key]).tolist() ) #prepare for flattening
x= np.asarray(flatten(res)) #flatten
x2= np.asarray(flatten(res2)) #flatten
assert(np.any(np.isnan(x)) == False)
assert(np.any(np.isnan(x2)) == False)
assert(np.any(np.isinf(x)) == False)
assert(np.any(np.isinf(x2)) == False)
#x= x[~np.isnan(x)]
#x2= x2[~np.isnan(x2)] #not getting the nans out YET
features = np.concatenate((x,x2))
if self.mydata is None:
self.mydata = features
else:
self.mydata = np.vstack((self.mydata, features))
#self.mydata = np.delete(self.mydata,0, axis=0)
#self.mydata = self.mydata[:,~np.isnan(self.mydata).any(axis=0)] #erasing the NaNs
#adding a comfortable function, where one can easily introduce the data
def add_allData(self, mydata, labels):
self.mydata = mydata
self.labels = labels
def train(self):
print("Training classifier from {} positive and {} negative labels".format(np.count_nonzero(np.asarray(self.labels)),
len(self.labels)- np.count_nonzero(np.asarray(self.labels))))
oob = self.rf.learnRF(self.mydata.astype("float32"), (np.asarray(self.labels)).astype("uint32").reshape(-1,1))
print("RF trained with OOB Error ", oob)
def predictSample(self, test_data):
return self.rf.predictLabels(test_data.astype('float32'))
def predictProbabilities(self, test_data):
return self.rf.predictProbabilities(test_data.astype('float32'))
def predictLabels(self, test_data, threshold=0.5):
prob = self.rf.predictProbabilities(test_data.astype('float32'))
res = np.copy(prob)
for i in range(0,len(prob)):
if prob[i][1]>= threshold:
res[i]=1.
else:
res[i]=0
return np.delete(res, 0, 1)
def writeRF(self, outputFilename):
self.rf.writeHDF5(outputFilename, pathInFile='/ClassifierForests/Forest0000')
# write selected features
with h5py.File(outputFilename, 'r+') as f:
featureNamesH5 = f.create_group('SelectedFeatures')
featureNamesH5 = featureNamesH5.create_group('Standard Object Features')
for feature in self.selectedFeatures:
featureNamesH5.create_group(feature)
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description="trainRF",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("filepath",
help="read ground truth from this folder", metavar="FILE")
parser.add_argument("rawimage_filename",
help="filepath+name of the raw image", metavar="FILE")
parser.add_argument("--rawimage-h5-path", dest='rawimage_h5_path', type=str,
help="Path inside the rawimage HDF5 file", default='volume/data')
parser.add_argument("initFrame", default=0, type=int,
help="where to begin reading the frames")
parser.add_argument("endFrame", default=0, type=int,
help="where to end frames")
parser.add_argument("outputFilename",
help="save RF into file", metavar="FILE")
parser.add_argument("--filename-zero-padding", dest='filename_zero_padding', default=5, type=int,
help="Number of digits each file name should be long")
parser.add_argument("--time-axis-index", dest='time_axis_index', default=2, type=int,
help="Zero-based index of the time axis in your raw data. E.g. if it has shape (x,t,y,c) this value is 1. Set to -1 to disable any changes")
args = parser.parse_args()
filepath = args.filepath
rawimage_filename = args.rawimage_filename
initFrame = args.initFrame
endFrame = args.endFrame
fileFormatString = '{'+':0{}'.format(args.filename_zero_padding)+'}.h5'
rawimage = vigra.impex.readHDF5(rawimage_filename, args.rawimage_h5_path)
try:
print(rawimage.axistags)
except:
pass
# transform such that the order is the following: X,Y,(Z),T, C
if args.time_axis_index != -1:
rawimage = np.rollaxis(rawimage, args.time_axis_index, -1)
features = compute_features(rawimage,read_in_images(initFrame,endFrame, filepath, fileFormatString),initFrame,endFrame)
selectedFeatures = find_features_without_NaNs(features)
mylabels = read_positiveLabels(initFrame,endFrame,filepath, fileFormatString)
neg_labels = negativeLabels(features,mylabels)
TC = TransitionClassifier(selectedFeatures)
# compute featuresA for each object A from the feature matrix from Vigra
def compute_ObjFeatures(features, obj):
dict={}
for key in features:
if key == "Global<Maximum >" or key=="Global<Minimum >": #this ones have only one element
dict[key] = features[key]
else:
dict[key] = features[key][obj]
return dict
for k in range(0,len(features)-1):
for i in mylabels[k]:
TC.addSample(compute_ObjFeatures(features[k], i[0]), compute_ObjFeatures(features[k+1], i[1]), 1) #positive
for i in neg_labels[k]:
TC.addSample(compute_ObjFeatures(features[k], i[0]), compute_ObjFeatures(features[k+1], i[1]), 0) #negative
TC.train()
# delete file before writing
if os.path.exists(args.outputFilename):
os.remove(args.outputFilename)
TC.writeRF(args.outputFilename) #writes learned RF to disk