/
RCNNObjectExtractor.py
452 lines (372 loc) · 17.9 KB
/
RCNNObjectExtractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
import numpy as np
import pandas as pd
import os
import sys
import argparse
from collections import defaultdict
import subprocess
import aux
import matplotlib
matplotlib.use('Agg')
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib import cm
from shapely.geometry import Polygon
import operator
import re
from nltk.corpus import wordnet as wn
import nltk
def main(arguments):
extractor = RCNNObjectExtractor(arguments)
extractor.process_hdf()
class RCNNObjectExtractor:
def __init__(self, args):
self.args = args
self.clusters = None if self.args.clustersfile == None else aux.load_clusters(self.args.clustersfile)
def process_hdf(self):
self.df = pd.read_hdf(re.sub("-[\d]","", self.args.imagefilename), 'df')
with open('/export/scratch1/elliott/caffe/data/ilsvrc12/det_synset_words.txt') as f:
self.labels_df = pd.DataFrame([
{
'synset_id': l.strip().split(' ')[0],
'name': ' '.join(l.strip().split(' ')[1:]).split(',')[0]
}
for l in f.readlines()
])
self.labels_df.sort('synset_id')
# We have multiple annotated images in each file, so split the HDF into different outputs
# with a dictionary that contains the following mapping:
# image_cols[image_file_name] -> [indices of the rows in the HDF that correspond to it]
self.image_cols = defaultdict(list)
for idx,x in enumerate(self.df.index):
self.image_cols[x].append(idx)
# Map the raw HDF data into a object-label centric structure
self.predictions_df = defaultdict(pd.DataFrame)
for x in self.image_cols:
self.predictions_df[x] = pd.DataFrame(np.vstack(self.df.prediction.values[self.image_cols[x][0:-1]]), columns=self.labels_df['name'])
self.predictions_df = pd.DataFrame(np.vstack(self.df.prediction.values), columns=self.labels_df['name'])
if self.args.verbose:
print "Initialised RCNN Extractor with annotations for %s" % self.args.imagefilename
if self.args.training:
return self.single_image(self.args.imagefilename, self.df, self.predictions_df, self.labels_df)
else:
self.extract_topn(self.df, self.predictions_df, self.labels_df)
def clustered_label(self, name):
if name in self.clusters:
return self.clusters[name]
else:
return name
'''
Determine whether an object with a particular label already exists in the
dictionary of detected objects. The detections dictionary is defined as:
label -> [data concerning that label]
'''
def already_detected(self, objectLabel, detections, useClusters=False):
exists = False
for d in detections.keys():
if d == objectLabel:
exists = True
if useClusters:
clusteredLabel = self.clustered_label(objectLabel)
for d in detections.keys():
if self.clustered_label(d) == clusteredLabel or d == clusteredLabel:
exists = True
return exists
'''
Search through the list of detected objects using Wordnet hypernyms to guide
the detection process. This is only called when a direct label matching does not
succeed but there may be a suitable candidate in the image: e.g. looking for
"bus" but the detected object is "trolleybus" or "minibus".
'''
def wordnet_search(self, predictions_df, labels_df, target_label):
# Get the top predictions for this image and sort them from highest to
# lowest confidence
if self.args.verbose:
print "Wordnet-searching for %s" % target_label
top = predictions_df.max()
top.sort(ascending=False)
for idx,x in enumerate(top.index):
if type(predictions_df[x]) == pd.DataFrame:
continue
df_row = predictions_df[x].argmax()
detection_data = [df_row, pd.Series(self.df['prediction'].iloc[df_row], index=labels_df['name'])]
detection_data[1].sort(ascending=False) # Sort by confidence over labels for each detected object
label = detection_data[1].index[0]
try:
hypernym_labels = wn.synset('%s.n.01' % label).hypernyms()
hypernym_lemmas = [x.lemmas() for x in hypernym_labels]
hypernym_lemmas = [x for h in hypernym_lemmas for x in h]
hypernym_lemmas = [x.name() for x in hypernym_lemmas]
except nltk.corpus.reader.wordnet.WordNetError:
# This word does not have a synset
hypernym_lemmas = []
for hypernym in hypernym_lemmas:
if hypernym == target_label:
print "Found Wordnet-backoff for %s in %s" % (target_label, hypernym)
return detection_data
return None
'''
Search through the list of detected objects using the annotator-defined
clusters to guide the detection process. This is only called when both
direct label and Wordnet matching does not find a suitable candidate:
e.g. looking for "girl" -> "woman" in Wordnet but we want "person".
'''
def cluster_search(self, predictions_df, labels_df, target_label):
# We didn't find both a subject and an object using the original labels
# so let's backoff to the clustered labels
clusteredLabel = self.clustered_label(target_label)
if self.args.verbose:
print "Cluster-searching for %s" % clusteredLabel
try:
detection = predictions_df[clusteredLabel]
if type(detection) == pd.Series and self.args.verbose:
print "Found cluster-backoff for %s in %s" % (target_label, clusteredLabel)
df_row = predictions_df[clusteredLabel].argmax()
detection = [df_row, pd.Series(self.df['prediction'].iloc[df_row], index=labels_df['name'])]
detection[1].sort(ascending=False) # Sort by confidence over labels for each detected object
except KeyError:
detection = None
return detection
'''
Get the highest-ranked bounding boxes for the subject and object from the data
for this specific image.
Returns None if we cannot match both the subject and object.
'''
def single_image(self, image_name, df, predictions_df, labels_df):
split_image_name = image_name.split("/")
pure_image_name = re.sub(r"-[1-3]","", split_image_name[-1])
pure_image_name = re.sub(r"hdf","jpg", pure_image_name)
df_image_name = pure_image_name
if self.args.sub != None and self.args.obj != None:
# Get the top predictions for this image and sort them from highest to
# lowest confidence
top = predictions_df.max()
top.sort(ascending=False)
# store the accepted detections in a dictionary.
# label -> [data concerning that label]
detections = dict()
try:
subj = predictions_df[self.args.sub]
print "Found %s directly" % (self.args.sub)
df_row = predictions_df[self.args.sub].argmax()
subj = [df_row, pd.Series(self.df['prediction'].iloc[df_row], index=labels_df['name'])]
subj[1].sort(ascending=False) # Sort by confidence over labels for each detected object
except KeyError:
subj = self.wordnet_search(predictions_df, labels_df, self.args.sub)
if not subj:
subj = self.cluster_search(predictions_df, labels_df, self.args.sub)
if subj:
detections[self.args.sub] = subj
print
try:
obj = predictions_df[self.args.obj]
print "Found %s directly" % (self.args.obj)
df_row = predictions_df[self.args.obj].argmax()
obj = [df_row, pd.Series(self.df['prediction'].iloc[df_row], index=labels_df['name'])]
obj[1].sort(ascending=False) # Sort by confidence over labels for each detected object
except KeyError:
obj = self.wordnet_search(predictions_df, labels_df, self.args.obj)
if not obj:
obj = self.cluster_search(predictions_df, labels_df, self.args.obj)
if obj:
detections[self.args.obj] = obj
print
# if len(detections) < 2:
# if self.args.verbose:
# print "Only found %s in detections, backing off to clustered labels" % detections.keys()
# # We didn't find both a subject and an object using the original labels
# # so let's backoff to the clustered labels
# clusteredSub = self.clustered_label(self.args.sub)
# clusteredObj = self.clustered_label(self.args.obj)
#
# for idx,x in enumerate(top.index):
#
# if len(detections) == 2:
# # found candidates for both the subject and object
# break
#
# if type(predictions_df[df_image_name][x]) == pd.DataFrame:
# continue
# df_row = predictions_df[df_image_name][x].argmax()
# detection_data = [df_row, pd.Series(df['prediction'].iloc[df_row], index=labels_df['name'])]
# detection_data[1].sort(ascending=False) # Sort by confidence over labels for each detected object
# label = detection_data[1].index[0]
# clustered_label = self.clustered_label(detection_data[1].index[0])
#
# if not self.already_detected(label, detections, True):
# if clustered_label == clusteredSub:
# if not self.nms_discard(detection_data, detections, df):
# detections[label] = detection_data
# detections[label][1].sort(ascending=False)
# if clustered_label == clusteredObj:
# if not self.nms_discard(detection_data, detections, df):
# detections[label] = detection_data
# detections[label][1].sort(ascending=False)
# Try to find the background objects and add these to the detections
# We go straight to the clustered representation here.
for back in self.args.others:
clustered_back = self.clustered_label(back)
for idx,x in enumerate(top.index):
df_row = [predictions_df[x].argmax()]
detection_data = [df_row, pd.Series(df['prediction'].iloc[df_row], index=labels_df['name'])]
detection_data[1].sort(ascending=False) # Sort by confidence over labels for each detected object
label = detection_data[1].index[0]
clustered_label = self.clustered_label(detection_data[1].index[0])
if not self.already_detected(label, detections, True):
if clustered_label == clustered_back:
if not self.nms_discard(detection_data, detections, df):
detections[clustered_label] = detection_data
detections[clustered_label][1].sort(ascending=False)
if len(detections) == 2:
self.write_detections(df, detections, split_image_name[-1], self.args.output)
return True
return False
def extract_topn(self, df, predictions_df, labels_df):
# Get the top N predicted bounding boxes from the data
image = self.args.imagefilename
if self.args.verbose:
print "Extracting the top %d predictions for %s" % (self.args.n, image)
split_image_name = self.args.imagefilename.split("/")
pure_image_name = re.sub(r"-[1-3]","", split_image_name[-1])
df_image_name = "%s/%s" % ("/export/scratch2/elliott/caffe/data/vlt", pure_image_name)
# Get the top predictions for this image and sort them from highest to
# lowest confidence
top = predictions_df.max()
top.sort(ascending=False)
# store the accepted detections in a dictionary.
# label -> [data concerning that label]
detections = dict()
for idx,x in enumerate(top.index):
df_row = predictions_df[x].argmax()
detection_data = [df_row, pd.Series(df['prediction'].iloc[df_row], index=labels_df['name'])]
detection_data[1].sort(ascending=False) # Sort by confidence over labels for each detected object
label = detection_data[1].index[0]
if label not in detections:
if self.nms_discard(detection_data, detections, df) == False:
detections[label] = detection_data
if len(detections) >= self.args.n:
break
self.write_detections(df, detections, image, self.args.output)
'''
Write the detected objects to an LabelMe XML-style file on disk.
Creates a new file with the .semi.xml format to prevent overwriting the
existing file, and a new annotation of the image with the detected objects
as .semi.jpg.
Each annotation in the XML file contains the boundaries of the object and the
confidence of the detector.
We also run the graphviz identify command to get the dimensions of the image.
'''
def write_detections(self, original_df, prediction_data, image_name, output_dir):
sorted_predictions = prediction_data.items()
sorted_predictions = sorted(sorted_predictions, key=lambda x: x[1][1][0], reverse=True)
xml_output_name = re.sub(r".hdf", ".semi.xml", self.args.imagefilename.split("/")[-1])
jpg_name = re.sub(r"-[\d]","", self.args.imagefilename.split("/")[-1])
jpg_name = re.sub(r"hdf","jpg", jpg_name)
output = open("%s/%s" % (output_dir, xml_output_name), "w")
# Open a new plotting output so we can write the annotations
# directly into the image file.
im = plt.imread("%s/%s" % (output_dir, jpg_name))
ax = plt.subplot(111)
currentAxis = plt.gca()
ax.imshow(im)
output.write("<annotation>\n")
output.write(" <filename>%s</filename>\n" % jpg_name)
output.write(" <folder></folder>\n")
output.write(" <source><sourceImage>Caffe RCNN</sourceImage></source>\n")
output.write(" <sourceAnnotation>Caffe RCNN</sourceAnnotation>\n")
color=iter(cm.Set1(np.linspace(0,1,len(prediction_data)+1)))
if self.args.verbose:
print "Saving the following detections to disk:"
for idx,detection in enumerate(sorted_predictions):
# Iterate through the detections and write them into the XML file
bordercolor = color.next()
label = detection[0]
df_idx = detection[1][0]
confidence = detection[1][1][0]
if self.args.verbose:
print "%s | conf: %f" % (label, confidence)
xmin = original_df.iloc[df_idx]['xmin']
xmax = original_df.iloc[df_idx]['xmax']
ymin = original_df.iloc[df_idx]['ymin']
ymax = original_df.iloc[df_idx]['ymax']
output.write(" <object>\n")
output.write(" <name>%s</name>\n" % label)
output.write(" <deleted>0</deleted>\n")
output.write(" <verified>0</verified>\n")
output.write(" <date>0</date>\n")
output.write(" <id>%d</id>\n" % idx)
output.write(" <polygon>\n")
output.write(" <pt><x>%d</x><y>%d</y></pt>\n" % (xmin, ymin))
output.write(" <pt><x>%d</x><y>%d</y></pt>\n" % (xmin, ymax))
output.write(" <pt><x>%d</x><y>%d</y></pt>\n" % (xmax, ymax))
output.write(" <pt><x>%d</x><y>%d</y></pt>\n" % (xmax, ymin))
output.write(" </polygon>\n")
output.write(" <confidence>%s</confidence>\n" % confidence)
output.write(" <color>%f,%f,%f</color>\n " % (bordercolor[0], bordercolor[1], bordercolor[2]))
output.write(" </object>\n")
# Add the detected object to the annotated image file we are creating
coords = (xmin, ymin), xmax-xmin, ymax-ymin
currentAxis.add_patch(plt.Rectangle(*coords, fill=False, linewidth=5, edgecolor=bordercolor, label="%s %.2f" % (label, confidence)))
# Close the object annotations plot
ax.axis("off")
plt.savefig("%s/%s-objects.pdf" % (output_dir, re.sub(r".hdf", "", image_name.split("/")[-1])), bbox_inches='tight')
if self.args.verbose:
print "Visualised detection output to %s" % ("%s/%s-objects.pdf" % (output_dir, re.sub(r".hdf", "", image_name)))
plt.close()
size = subprocess.check_output(['identify', "%s/%s" % (output_dir, jpg_name)])
size = size.split(" ")
size = size[2].split("x")
output.write(" <imagesize>\n")
output.write(" <nrows>%s</nrows>\n" % size[0])
output.write(" <ncols>%s</ncols>\n" % size[1])
output.write(" </imagesize>\n")
output.write("</annotation>")
output.close()
if self.args.verbose:
print "Wrote predictions to %s" % xml_output_name
'''
We need to discard proposals that overlap with higher confidence detections.
'''
def nms_discard(self, proposal, accepted_detections, dataframe):
p_idx = proposal[0]
p_label = proposal[1].index[0]
p_xmin = dataframe.iloc[p_idx]['xmin']
p_xmax = dataframe.iloc[p_idx]['xmax']
p_ymin = dataframe.iloc[p_idx]['ymin']
p_ymax = dataframe.iloc[p_idx]['ymax']
p_poly = Polygon([(p_xmin,p_ymin), (p_xmax,p_ymin), (p_xmax,p_ymax), (p_xmin, p_ymax)])
for detection in accepted_detections:
detection = accepted_detections[detection]
d_idx = detection[0]
d_label = detection[1].index[0]
if d_label != p_label:
# No point checking if it isn't the same class of object
continue
else:
d_xmin = dataframe.iloc[d_idx]['xmin']
d_xmax = dataframe.iloc[d_idx]['xmax']
d_ymin = dataframe.iloc[d_idx]['ymin']
d_ymax = dataframe.iloc[d_idx]['ymax']
d_poly = Polygon([(d_xmin,d_ymin), (d_xmax,d_ymin), (d_xmax,d_ymax), (d_xmin, d_ymax)])
intersection = p_poly.intersection(d_poly)
union = p_poly.union(d_poly)
if intersection.area / union.area > 0.3:
return True
break
return False
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Extract the RCNN object region proposals from an HDF file.')
parser.add_argument('--hdffile', help='the HDF file to read the data from')
parser.add_argument('-n', type=int, help='number of detections to return', default=5)
parser.add_argument('--output', required=0, help='optional location to write the LabelMe compatible XML output')
parser.add_argument('--clustersfile', required=0, help="Optional list of objects to extract, if not provided then all object types will be deemed acceptable.")
parser.add_argument("--sub", required=0, help="Look for a specific subject?")
parser.add_argument("--obj", required=0, help="Look for a specific object?")
parser.add_argument("--image", required=0, help="Extract from a specific image?")
parser.add_argument("--verbose", help="Should the output be verbose?", action="store_true")
parser.add_argument("--training", help="Are we extracting training data or test data?", action="store_true", default=True)
if len(sys.argv)==1:
parser.print_help()
sys.exit(1)
main(parse.parse_args())