(args.W, args.H), interpolation=cv2.INTER_AREA) / 255. frame_objs = [] frame_rels = [] ids_cur_frame = [] for j in range(n_objects): id = get_identifier(objects[j]) if check_contain_id(id, ids_filter) and not check_contain_id( id, ids_cur_frame): ids_cur_frame.append(id) material = objects[j]['material'] shape = objects[j]['shape'] attr = encode_attr(material, shape, bbox_size, args.attr_dim) mask_raw = decode(objects[j]['mask']) mask = cv2.resize(mask_raw, (W, H), interpolation=cv2.INTER_NEAREST) # cv2.imshow('mask', mask * 255) # cv2.waitKey(0) bbox, pos = convert_mask_to_bbox(mask_raw, H, W, bbox_size) pos_mean = torch.FloatTensor(np.array([H / 2., W / 2.])) pos_mean = pos_mean.unsqueeze(1).unsqueeze(1) pos_std = pos_mean pos = normalize(pos, pos_mean, pos_std) mask_crop = normalize(crop(mask, bbox, H, W), 0.5, 1).unsqueeze(0)
def __getitem__(self, idx): #pdb.set_trace() n_his = self.args.n_his frame_offset = self.args.frame_offset idx_video, idx_frame = self.valid_idx[idx][0], self.valid_idx[idx][1] objs = [] attrs = [] hws = [] for i in range(idx_frame - n_his * frame_offset, idx_frame + frame_offset + 1, frame_offset): frame = self.metadata[idx_video]['frames'][i] #frame_filename = frame['frame_filename'] frame_filename = os.path.join( 'video_' + str(idx_video).zfill(5), str(frame['frame_index'] + 1) + '.png') #pdb.set_trace() objects = frame['objects'] n_objects = len(objects) sub_id = idx_video // 1000 full_img_dir = os.path.join( self.data_dir, 'image_' + str(sub_id * 1000).zfill(5) + '-' + str( (sub_id + 1) * 1000).zfill(5)) img = self.loader(os.path.join(full_img_dir, frame_filename)) img = np.array(img)[:, :, ::-1].copy() img = cv2.resize(img, (self.W, self.H), interpolation=cv2.INTER_AREA).astype( np.float) / 255. ### prepare object inputs object_inputs = [] for j in range(n_objects): material = objects[j]['material'] shape = objects[j]['shape'] if i == idx_frame - n_his * frame_offset: attrs.append( encode_attr(material, shape, self.bbox_size, self.args.attr_dim)) mask_raw = decode(objects[j]['mask']) mask = cv2.resize(mask_raw, (self.W, self.H), interpolation=cv2.INTER_NEAREST) # cv2.imshow("mask", mask * 255) # cv2.waitKey(0) #bbox, pos, box_hw = convert_mask_to_bbox_hw(mask_raw, self.H, self.W, self.bbox_size, objects[j]['mask']) bbox, pos = convert_mask_to_bbox(mask_raw, self.H, self.W, self.bbox_size) pos_mean = torch.FloatTensor( np.array([self.H / 2., self.W / 2.])) pos_mean = pos_mean.unsqueeze(1).unsqueeze(1) pos_std = pos_mean pos = normalize(pos, pos_mean, pos_std) mask_crop = normalize(crop(mask, bbox, self.H, self.W), 0.5, 1).unsqueeze(0) img_crop = normalize(crop(img, bbox, self.H, self.W), 0.5, 0.5).permute(2, 0, 1) if self.args.box_only_flag: bbx_xyxy, ret, crop_box, crop_box_v2 = decode_mask_to_box( objects[j]['mask'], [self.bbox_size, self.bbox_size], self.H, self.W) ret_mean = torch.FloatTensor( np.array([1 / 2., 1 / 2., 1 / 2., 1 / 2.])) ret_mean = ret_mean.unsqueeze(1).unsqueeze(1) ret_std = ret_mean ret = normalize(ret, ret_mean, ret_std) pos = ret[:2] hw = ret[2:] elif self.args.add_hw_state_flag: bbx_xyxy, ret, crop_box, crop_box_v2 = decode_mask_to_box( objects[j]['mask'], [self.bbox_size, self.bbox_size], self.H, self.W) ret_mean = torch.FloatTensor( np.array([1 / 2., 1 / 2., 1 / 2., 1 / 2.])) ret_mean = ret_mean.unsqueeze(1).unsqueeze(1) ret_std = ret_mean ret = normalize(ret, ret_mean, ret_std) hw = ret[2:] elif self.args.add_xyhw_state_flag: bbx_xyxy, ret, crop_box, crop_box_v2 = decode_mask_to_box( objects[j]['mask'], [self.bbox_size, self.bbox_size], self.H, self.W) ret_mean = torch.FloatTensor( np.array([1 / 2., 1 / 2., 1 / 2., 1 / 2.])) ret_mean = ret_mean.unsqueeze(1).unsqueeze(1) ret_std = ret_mean ret = normalize(ret, ret_mean, ret_std) pos = ret[:2] hw = ret[2:] identifier = get_identifier(objects[j]) if self.args.box_only_flag: s = torch.cat([pos, hw], 0).unsqueeze(0), identifier elif self.args.add_hw_state_flag or self.args.add_xyhw_state_flag: s = torch.cat([mask_crop, pos, img_crop, hw], 0).unsqueeze(0), identifier elif self.args.rm_mask_state_flag: s = torch.cat([mask_crop * 0, pos, img_crop], 0).unsqueeze(0), identifier else: s = torch.cat([mask_crop, pos, img_crop], 0).unsqueeze(0), identifier object_inputs.append(s) objs.append(object_inputs) attr = torch.cat(attrs, 0).view(n_objects, self.args.attr_dim, self.bbox_size, self.bbox_size) feats = [] for x in range(n_objects): feats.append(objs[0][x][0]) for i in range(1, len(objs)): for x in range(n_objects): for y in range(n_objects): id_x = objs[0][x][1] id_y = objs[i][y][1] if check_same_identifier(id_x, id_y): feats[x] = torch.cat([feats[x], objs[i][y][0]], 1) try: feats = torch.cat(feats, 0) except: print(idx_video, idx_frame) # print("feats shape", feats.size()) ### prepare relation attributes n_relations = n_objects * n_objects Ra = torch.FloatTensor( np.ones((n_relations, self.args.relation_dim * (self.args.n_his + 2), self.bbox_size, self.bbox_size)) * -0.5) # change to relative position relation_dim = self.args.relation_dim state_dim = self.args.state_dim if self.args.box_only_flag: for i in range(n_objects): for j in range(n_objects): idx = i * n_objects + j Ra[idx, 1::relation_dim] = feats[i, 0::state_dim] - feats[ j, 0::state_dim] # x Ra[idx, 2::relation_dim] = feats[i, 1::state_dim] - feats[ j, 1::state_dim] # y else: for i in range(n_objects): for j in range(n_objects): idx = i * n_objects + j Ra[idx, 1::relation_dim] = feats[i, 1::state_dim] - feats[ j, 1::state_dim] # x Ra[idx, 2::relation_dim] = feats[i, 2::state_dim] - feats[ j, 2::state_dim] # y # add collision attr gt = self.metadata[idx_video]['ground_truth'] gt_ids = gt['objects'] gt_collisions = gt['collisions'] label_rel = torch.FloatTensor( np.ones((n_objects * n_objects, 1)) * -0.5) if self.args.edge_superv: for i in range(idx_frame - n_his * frame_offset, idx_frame + frame_offset + 1, frame_offset): for j in range(len(gt_collisions)): frame_id = gt_collisions[j]['frame'] if 0 <= frame_id - i < self.args.frame_offset: id_0 = gt_collisions[j]['object'][0] id_1 = gt_collisions[j]['object'][1] for k in range(len(gt_ids)): if id_0 == gt_ids[k]['id']: id_x = get_identifier(gt_ids[k]) if id_1 == gt_ids[k]['id']: id_y = get_identifier(gt_ids[k]) # id_0 = get_identifier(gt_ids[gt_collisions[j]['object'][0]]) # id_1 = get_identifier(gt_ids[gt_collisions[j]['object'][1]]) for k in range(n_objects): if check_same_identifier(objs[0][k][1], id_x): x = k if check_same_identifier(objs[0][k][1], id_y): y = k idx_rel_xy = x * n_objects + y idx_rel_yx = y * n_objects + x # print(x, y, n_objects) idx = i - (idx_frame - n_his * frame_offset) idx /= frame_offset Ra[idx_rel_xy, int(idx) * relation_dim] = 0.5 Ra[idx_rel_yx, int(idx) * relation_dim] = 0.5 if i == idx_frame + frame_offset: label_rel[idx_rel_xy] = 1 label_rel[idx_rel_yx] = 1 ''' print(feats[0, -state_dim]) print(feats[0, -state_dim+1]) print(feats[0, -state_dim+2]) print(feats[0, -state_dim+3]) print(feats[0, -state_dim+4]) ''' ''' ### change absolute pos to relative pos feats[:, state_dim+1::state_dim] = \ feats[:, state_dim+1::state_dim] - feats[:, 1:-state_dim:state_dim] # x feats[:, state_dim+2::state_dim] = \ feats[:, state_dim+2::state_dim] - feats[:, 2:-state_dim:state_dim] # y feats[:, 1] = 0 feats[:, 2] = 0 ''' x = feats[:, :-state_dim] label_obj = feats[:, -state_dim:] if self.args.box_only_flag: label_obj[:, 1] -= feats[:, -2 * state_dim + 1] label_obj[:, 2] -= feats[:, -2 * state_dim + 2] label_obj[:, 0] -= feats[:, -2 * state_dim + 0] label_obj[:, 3] -= feats[:, -2 * state_dim + 3] else: label_obj[:, 1] -= feats[:, -2 * state_dim + 1] label_obj[:, 2] -= feats[:, -2 * state_dim + 2] rel = prepare_relations(n_objects) rel.append(Ra[:, :-relation_dim]) ''' print(rel[-1][0, 0]) print(rel[-1][0, 1]) print(rel[-1][0, 2]) print(rel[-1][2, 3]) print(rel[-1][2, 4]) print(rel[-1][2, 5]) ''' # print("attr shape", attr.size()) # print("x shape", x.size()) # print("label_obj shape", label_obj.size()) # print("label_rel shape", label_rel.size()) ''' for i in range(n_objects): print(objs[0][i][1]) print(label_obj[i, 1]) time.sleep(10) ''' return attr, x, rel, label_obj, label_rel
def __getitem__(self, idx): #pdb.set_trace() n_his = self.args.n_his frame_offset = self.args.frame_offset idx_video, idx_frame = self.valid_idx[idx][0], self.valid_idx[idx][1] objs = [] attrs = [] for i in range(idx_frame - n_his * frame_offset, idx_frame + frame_offset + 1, frame_offset): frame = self.metadata[idx_video]['proposals']['frames'][i] #frame_filename = frame['frame_filename'] frame_filename = os.path.join( 'video_' + str(idx_video).zfill(5), str(frame['frame_index'] + 1) + '.png') objects = frame['objects'] n_objects = len(objects) vid = int(idx_video / 1000) ann_full_dir = os.path.join( self.data_dir, 'image_%02d000-%02d000' % (vid, vid + 1)) img = self.loader(os.path.join(ann_full_dir, frame_filename)) img = np.array(img)[:, :, ::-1].copy() img = cv2.resize(img, (self.W, self.H), interpolation=cv2.INTER_AREA).astype( np.float) / 255. ### prepare object inputs object_inputs = [] for j in range(n_objects): material = objects[j]['material'] shape = objects[j]['shape'] if i == idx_frame - n_his * frame_offset: attrs.append( encode_attr(material, shape, self.bbox_size, self.args.attr_dim)) bbox_xyxy, xyhw_exp, crop_box, crop_box_v2 = decode_mask_to_box(objects[j]['mask'],\ [self.bbox_size, self.bbox_size], self.H, self.W) #img_crop = normalize(crop(img, crop_box, self.H, self.W), 0.5, 0.5).permute(2, 0, 1) img_crop = normalize(crop(img, crop_box_v2, self.H, self.W), 0.5, 0.5).permute(2, 0, 1) tube_id = utilsTube.get_tube_id_from_bbox( bbox_xyxy, frame['frame_index'], self.metadata[idx_video]['tubes']) if tube_id == -1: pdb.set_trace() if self.args.box_only_flag: xyhw_norm = (xyhw_exp - 0.5) / 0.5 s = torch.cat([xyhw_norm], 0).unsqueeze(0), tube_id elif self.args.new_mode == 1: xyhw_norm = (xyhw_exp - 0.5) / 0.5 s = torch.cat([xyhw_norm, img_crop], 0).unsqueeze(0), tube_id else: s = torch.cat([xyhw_exp, img_crop], 0).unsqueeze(0), tube_id object_inputs.append(s) objs.append(object_inputs) attr = torch.cat(attrs, 0).view(n_objects, self.args.attr_dim, self.bbox_size, self.bbox_size) feats = [] for x in range(n_objects): feats.append(objs[0][x][0]) for i in range(1, len(objs)): for x in range(n_objects): for y in range(n_objects): id_x = objs[0][x][1] id_y = objs[i][y][1] if id_x == id_y: feats[x] = torch.cat([feats[x], objs[i][y][0]], 1) try: feats = torch.cat(feats, 0) except: print(idx_video, idx_frame) #pdb.set_trace() ### prepare relation attributes n_relations = n_objects * n_objects Ra = torch.FloatTensor( np.ones((n_relations, self.args.relation_dim * (self.args.n_his + 2), self.bbox_size, self.bbox_size)) * -0.5) # change to relative position relation_dim = self.args.relation_dim state_dim = self.args.state_dim if self.args.box_only_flag or self.args.new_mode == 1: for i in range(n_objects): for j in range(n_objects): idx = i * n_objects + j Ra[idx, 1::relation_dim] = feats[i, 0::state_dim] - feats[ j, 0::state_dim] # x Ra[idx, 2::relation_dim] = feats[i, 1::state_dim] - feats[ j, 1::state_dim] # y else: for i in range(n_objects): for j in range(n_objects): idx = i * n_objects + j Ra[idx, 1::relation_dim] = feats[i, 0::state_dim] - feats[ j, 0::state_dim] # x Ra[idx, 2::relation_dim] = feats[i, 1::state_dim] - feats[ j, 1::state_dim] # y Ra[idx, 3::relation_dim] = feats[i, 2::state_dim] - feats[ j, 2::state_dim] # h Ra[idx, 4::relation_dim] = feats[i, 3::state_dim] - feats[ j, 3::state_dim] # w label_rel = torch.FloatTensor( np.ones((n_objects * n_objects, 1)) * -0.5) ''' ### change absolute pos to relative pos feats[:, state_dim+1::state_dim] = \ feats[:, state_dim+1::state_dim] - feats[:, 1:-state_dim:state_dim] # x feats[:, state_dim+2::state_dim] = \ feats[:, state_dim+2::state_dim] - feats[:, 2:-state_dim:state_dim] # y feats[:, 1] = 0 feats[:, 2] = 0 ''' #pdb.set_trace() x = feats[:, :-state_dim] label_obj = feats[:, -state_dim:] label_obj[:, 0] -= feats[:, -2 * state_dim + 0] label_obj[:, 1] -= feats[:, -2 * state_dim + 1] label_obj[:, 2] -= feats[:, -2 * state_dim + 2] label_obj[:, 3] -= feats[:, -2 * state_dim + 3] rel = prepare_relations(n_objects) rel.append(Ra[:, :-relation_dim]) ''' print(rel[-1][0, 0]) print(rel[-1][0, 1]) print(rel[-1][0, 2]) print(rel[-1][2, 3]) print(rel[-1][2, 4]) print(rel[-1][2, 5]) ''' # print("attr shape", attr.size()) # print("x shape", x.size()) # print("label_obj shape", label_obj.size()) # print("label_rel shape", label_rel.size()) ''' for i in range(n_objects): print(objs[0][i][1]) print(label_obj[i, 1]) time.sleep(10) ''' return attr, x, rel, label_obj, label_rel