def __getitem__(self, idx): data = copy.deepcopy(self.datalist[idx]) img_path, skeleton_path, original_shape, action_label, frame_num, start_frame_idx = data[ 'img_path'], data['skeleton_path'], data['original_shape'], data[ 'action_label'], data['frame_num'], data['start_frame_idx'] # video load video, skeleton_frame_idxs = load_video(img_path, frame_num, start_frame_idx) resized_shape = video.shape[1:3] # augmentation video, img2aug_trans, aug2img_trans, do_flip = augmentation( video, self.data_split) video = video.transpose(0, 3, 1, 2).astype( np.float32) / 255. # frame_num, channel_dim, height, width # skeleton information load pose_coords, pose_scores = self.load_skeleton(skeleton_path, skeleton_frame_idxs, original_shape, resized_shape) # process skeleton information pose_coords, pose_scores = process_skeleton(pose_coords, pose_scores, img2aug_trans, do_flip, self.flip_pairs, self.joint_num, resized_shape) """ # for debug # keypoint visualization for i in range(cfg.frame_per_seg): img = video[i,::-1,:,:].transpose(1,2,0) * 255 person_num = len(pose_coords[i]) for p in range(person_num): #for j in range(self.joint_num): #coord = (int(pose_coords[i][p][j][0]), int(pose_coords[i][p][j][1])) #cv2.circle(img, coord, radius=3, color=(255,0,0), thickness=-1, lineType=cv2.LINE_AA) #cv2.imwrite(str(idx) + '_' + str(action_label) + '_' + str(i) + '_' + str(j) + '.jpg', img) coord = pose_coords[i][p].copy() coord[:,0] = coord[:,0] / cfg.input_hm_shape[1] * cfg.input_img_shape[1] coord[:,1] = coord[:,1] / cfg.input_hm_shape[0] * cfg.input_img_shape[0] img = vis_keypoints(img, pose_coords[i][p] * 4, self.skeleton) cv2.imwrite(str(idx) + '_' + str(action_label) + '_' + str(i) + '.jpg', img) """ inputs = { 'video': video, 'pose_coords': pose_coords, 'pose_scores': pose_scores } targets = {'action_label': action_label} meta_info = {'img_id': data['img_id']} return inputs, targets, meta_info
if len(losses) > 50: done = True for i in range(-51, -2): if (losses[i] - losses[i + 1]) > loss_tolerance * losses[i]: done = False losses.append(loss) return image, done if __name__ == "__main__": style_path = "../images/style.jpg" style_image, _ = load_img(style_path) images, yuvs = load_video("../videos/cat2.mp4", frame_interval) styled_images = [] losses = [] start = time.time() content_layers = ["block5_conv2"] style_layers = [ "block1_conv1", "block2_conv1", "block3_conv1", "block4_conv1", "block5_conv1", ]