This repository has been archived by the owner on Nov 16, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
convert_mpii.py
executable file
·274 lines (237 loc) · 9.95 KB
/
convert_mpii.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
#!/usr/bin/env python3
"""Conversion script for MPII Cooking Activities 2 dataset."""
from argparse import ArgumentParser
from glob import glob
from json import dumps
from multiprocessing import Pool
from os import path
import sys
from textwrap import wrap
from tqdm import tqdm
import numpy as np
from h5py import File
# I'm pretty sure this is the same as IkeaDB, since the poses were produced by
# the same model (CPM).
PARENTS = [0, 0, 1, 2, 3, 1, 5, 6]
# Ignore things that aren't upper-body joints
GOOD_JOINTS = range(8)
assert len(GOOD_JOINTS) == len(PARENTS)
# Comments are copied from email to Anoop. Trust them over the code. Make sure
# that you check that all actions (including all actions appearing in merges)
# occur in the dataset. If they don't, then I've probably made a typo.
ACTION_MERGES = {knife_act: 'knife actionV' for knife_act in [
# cut apartV, cut diceV, cut off endsV, cut out insideV,
# cut stripesV, cutV, sliceV, chopV
'cut apartV', 'cut diceV', 'cut off endsV', 'cut out insideV',
'cut stripesV', 'cutV', 'sliceV', 'chopV'
]}
ACTION_LIST = [
'n/a', # unknown (not in email to Anoop)
'screw openV', # 1. Screw open
'pourV', # 2. Pour
'screw closeV', # 3. Screw close
'washV', # 4. Wash
'shakeV', # 5. Shake
'knife actionV', # 6. Combination of all knife actions
'addV', # 7. Add
'spiceV', # 8. Spice
'throw in garbageV', # 9. Throw in garbage
'put lidV', # 10. Put lid
'take lidV', # 11. Take lid
'rip openV', # 12. Rip open
'fillV', # 13. Fill
'stirV', # 14. Stir
'spreadV', # 15. Spread
'whipV', # 16. Whip
'open eggV', # 17. Open egg
'stampV', # 18. Stamp
]
# Accounts for temporal subsampling of frames applied before pose estimation.
# In this case, we've only got frames 1, 11, 21, etc.
FRAME_SKIP_FACTOR = 10
# Here are the fields in Anoop's MAT file (see MPIICA2_Actions18 in
# /data/home/cherian/MPII/MPII2/MPIICA2_Actions18.mat). Not using his MAT file
# yet, but will probably use it eveuntually.
#
# 6164x1 struct array with fields:
#
# video_id
# seq_name
# clip_path
# start_frame
# end_frame
# num_frames
# frame_name
# person_idx
# cropbox
# poses
# activity_id
# activity_labels
# pose_visibility
# frames
# dimensions
def load_str_cells(fp, path):
"""Load a Matlab cell array of strings from a path within a h5py File
object."""
rv = []
for r in fp[path].value.flatten():
# Get around opaque reference objects stored in HDF5 array. I think
# this is a way of doing string interning or storing heterogenous
# datatypes (or both?).
deref = fp[r].value
as_str = deref.astype('uint8').tobytes().decode('utf8')
rv.append(as_str)
return rv
def merge_acts(acts_arr):
acts_arr = acts_arr.copy()
# first check that all actions-to-merge occur
acts_set = set(acts_arr)
k_set = set(ACTION_MERGES.keys())
assert k_set.issubset(acts_set), k_set - acts_set
al_set = set(ACTION_LIST[1:])
for idx, act in enumerate(acts_arr):
merged = False
if act in ACTION_MERGES:
acts_arr[idx] = act = ACTION_MERGES[act]
merged = True
if act not in al_set:
assert not merged, "about to remove merged action (?! why " \
"bother with merge if you drop it?)"
acts_arr[idx] = act = 'n/a'
# now check that everything in action list (except n/a action) appears in
# new list
acts_set = set(acts_arr)
assert acts_set.issuperset(al_set)
return acts_arr
def acts_to_cat_vec(start_frames, end_frames, acts_by_time, all_acts,
num_frames):
"""Turn action names, start times and end times into a vector of
categorical variables (i.e. non-negative boudned integers). Resolves
conflicts (two actions happening at the same time) in favour of later
action (since I'm not absolutely 100% sure that start and end times define
closed intervals)."""
assert max(end_frames) <= num_frames, "only %d frames, but end_frames " \
"goes up to %d" % (num_frames, max(end_frames))
sort_inds = np.argsort(start_frames)
start_frames = start_frames[sort_inds]
end_frames = end_frames[sort_inds]
acts_by_time = acts_by_time[sort_inds]
int_classes = np.zeros((num_frames,), dtype='uint8')
for start, end, act in zip(start_frames, end_frames, acts_by_time):
act_id = int(np.argwhere(all_acts == act))
assert 0 <= act_id <= 255, "act_id=%d won't fit in uint8" % act_id
int_classes[start:end+1] = act_id
return int_classes
def load_attrs(attr_path):
with File(attr_path) as fp:
# the inner fp[] gives us a reference
# File names are stuff like '/BS/.../tsv/s37-d74-cam-002.tsv'
vid_names = [path.basename(n)[:-4] for n in
load_str_cells(fp, '/annos/annoFileMap')]
vid_name_to_id = {}
for idx, vid_name in enumerate(vid_names):
vid_name_to_id[vid_name] = idx + 1
# some discrete things are floats rather than ints ;_;
def fai(p):
return fp[p].value.flatten().astype(int)
# Subtract 1 to make them zero-based. They still use closed intervals,
# IIRC. The division is to account for fact that a large fraction of
# frames were dropped before pose processing.
start_frames = (fai('/annos/startFrame') - 1) // FRAME_SKIP_FACTOR
end_frames = (fai('/annos/endFrame') - 1) // FRAME_SKIP_FACTOR
vid_ids = fai('/annos/fileId')
activities = np.asarray(load_str_cells(fp, '/annos/activity'))
activities = merge_acts(activities)
return {
'name_to_id': vid_name_to_id,
'activities': activities,
'vid_ids': vid_ids,
'start_frames': start_frames,
'end_frames': end_frames
}
def load_seq(args):
mat_dir, attr_dict = args
# id_str will be left-zero-padded
mat_paths = glob(path.join(mat_dir, '*.mat'))
to_collate = {}
for mat_path in mat_paths:
t = int(path.basename(mat_path).split('.')[0])
# these are Matlab v7.3 files, so we need to treat them as plain HDF5
with File(mat_path, 'r') as fp:
this_pose = fp['/pose'].value.T
# will be J*2 matrix
assert this_pose.ndim == 2 and this_pose.shape[1] == 2, this_pose.shape
# discard lower body junk
to_collate[t] = this_pose[GOOD_JOINTS]
joints = np.zeros((len(to_collate), len(GOOD_JOINTS), 2), dtype='float')
# Times seem to jump forward/backward by fixed amounts (10 frames?) not
# sure why. Need something like this (probably inelegant implementation
# here) to put things back in the right order.
tj = 0
for t in sorted(to_collate.keys()):
joints[tj] = to_collate[t]
tj += 1
del to_collate
# Normalise by median upper arm length; no idea whether this works
# left shoulder/right hip
hsd_lr = np.linalg.norm(joints[:, 2] - joints[:, 3], axis=1)
# right shoulder/left hip
hsd_rl = np.linalg.norm(joints[:, 5] - joints[:, 6], axis=1)
scale = np.median(np.concatenate((hsd_lr, hsd_rl)))
# Make sure that scale is sane
if abs(scale) < 40 or abs(scale) > 400:
return None
# Need to be T*XY*J
joints = joints.transpose((0, 2, 1))
assert joints.shape[1] == 2, joints.shape
assert joints.shape[0] == len(mat_paths), joints.shape
# now we make actions
vid_name = path.basename(mat_dir.rstrip(path.sep))
vid_mask = attr_dict['vid_ids'] == attr_dict['name_to_id'][vid_name]
assert vid_mask.ndim == 1 and vid_mask.sum() >= 1, vid_mask.shape
start_frames = attr_dict['start_frames'][vid_mask]
end_frames = attr_dict['end_frames'][vid_mask]
act_names = attr_dict['activities'][vid_mask]
actions = acts_to_cat_vec(start_frames,
end_frames,
act_names,
np.asarray(ACTION_LIST),
len(joints))
return joints, actions, scale
parser = ArgumentParser()
# Anoop provided the pose directory. Check
# /data/home/cherian/MPII/Cheng-MPII-Pose-Action/detected_poses/
parser.add_argument('pose_path', help='path to MPII pose dir (from CPM)')
# This is shipped with Cooking Activities 2. See
# /data/home/sam/mpii-cooking-2/attributesAnnotations_MPII-Cooking-2.mat (I
# think that's the right one, anyway).
parser.add_argument('attr_path', help='path to MPII attributes file (.mat)')
parser.add_argument('dest', help='path for HDF5 output file')
if __name__ == '__main__':
args = parser.parse_args()
dir_list = glob(path.join(args.pose_path, 's*-d*-cam-*'))
attr_dict = load_attrs(args.attr_path)
with File(args.dest, 'w') as fp:
skipped = []
with Pool() as p:
seq_iter = p.imap(load_seq, ((d, attr_dict) for d in dir_list))
zipper = zip(dir_list, seq_iter)
for dir_path, triple in tqdm(zipper, total=len(dir_list)):
joints, actions, scale = triple
id_str = path.basename(dir_path)
if joints is None:
skipped.append(id_str)
continue
prefix = '/seqs/' + id_str
assert len(joints) == len(actions)
fp[prefix + '/poses'] = joints
fp[prefix + '/actions'] = actions
fp[prefix + '/scale'] = scale
fp['/action_names'] = np.array([ord(c) for c in dumps(ACTION_LIST)],
dtype='uint8')
fp['/num_actions'] = len(ACTION_LIST)
fp['/parents'] = np.array(PARENTS, dtype=int)
if skipped:
print('WARNING: skipped %i seq(s) due to scale:' % len(skipped),
file=sys.stderr)
print('\n'.join(wrap(', '.join(skipped))))