def video_load_crop(entry, input_size): fg_path, bg_path, previous_path, flo_path = entry alpha, fg = reader.read_fg_img(fg_path) fg = fg.astype(dtype=np.float) # potentially very big bg = cv2.imread(bg_path).astype(dtype=np.float) flo = reader.read_flow(flo_path) prev_alpha, _ = reader.read_fg_img(previous_path) warped_alpha = flow.warp_img(prev_alpha, flo) warped_alpha = np.repeat(warped_alpha[:, :, np.newaxis], 3, axis=2) crop_type = [(320, 320), (480, 480), (640, 640)] # we crop images of different sizes crop_h, crop_w = crop_type[np.random.randint(0, len(crop_type))] fg_h, fg_w = fg.shape[:2] if fg_h < crop_h or fg_w < crop_w: # in that case the image is not too big, and we have to add padding alpha = alpha.reshape((alpha.shape[0], alpha.shape[1], 1)) cat = np.concatenate((fg, alpha, warped_alpha), axis=2) cropped_cat = get_padded_img(cat, crop_h, crop_w) fg, alpha, warped_alpha = np.split(cropped_cat, indices_or_sections=[3, 4], axis=2) # otherwise, the fg is likely to be HRes, we directly crop it and dismiss the original image # to avoid manipulation big images fg_h, fg_w = fg.shape[:2] i, j = np.random.randint(0, fg_h - crop_h + 1), np.random.randint( 0, fg_w - crop_w + 1) fg = fg[i:i + crop_h, j:j + crop_h] alpha = alpha[i:i + crop_h, j:j + crop_h] warped_alpha = warped_alpha[i:i + crop_h, j:j + crop_h] # randomly picks top-left corner bg_crop_h, bg_crop_w = int(np.ceil(crop_h * bg.shape[0] / fg.shape[0])), \ int(np.ceil(crop_w * bg.shape[1] / fg.shape[1])) padded_bg = get_padded_img(bg, bg_crop_h, bg_crop_w) i, j = np.random.randint(0, bg.shape[0] - bg_crop_h + 1), np.random.randint( 0, bg.shape[1] - bg_crop_w + 1) cropped_bg = padded_bg[i:i + bg_crop_h, j:j + bg_crop_w] bg = cv2.resize(src=cropped_bg, dsize=input_size, interpolation=cv2.INTER_LINEAR) fg = cv2.resize(fg, input_size, interpolation=cv2.INTER_LINEAR) alpha = cv2.resize(alpha, input_size, interpolation=cv2.INTER_LINEAR) warped_alpha = cv2.resize(warped_alpha, input_size, interpolation=cv2.INTER_LINEAR) cmp = reader.create_composite_image(fg, bg, alpha) cmp -= params.VGG_MEAN bg -= params.VGG_MEAN # inp = np.concatenate((cmp, # bg, # trimap.reshape((h, w, 1))), axis=2) label = alpha.reshape((alpha.shape[0], alpha.shape[1], 1)) # warped_alpha = np.dstack([warped_alpha[:, :, np.newaxis]] * 3) return cmp, bg, label, warped_alpha, fg
def augmentation(dim_dataset, voc_dataset, sig_dataset): """ create synthetic data for video matting by warping composite images (DIM matte / VOC background) """ n = 50 # we both take files from test and train datasets filepaths = [[os.path.join(dim_dataset, 'fg', folder, file) for file in os.listdir(os.path.join(dim_dataset, 'fg', folder))] for folder in ['DIM_TEST', 'DIM_TRAIN']] paths = filepaths[0] + filepaths[1] # we take VOC images as background voc_list = [os.path.join(voc_dataset, file) for file in os.listdir(voc_dataset)] dst_fg = os.path.join(sig_dataset, 'fg', 'augmented') dst_bg = os.path.join(sig_dataset, 'bg', 'augmented') for i, path in enumerate(paths): alpha, fg = reader.read_fg_img(path) name = os.path.basename(path).split('.')[0] print('Processing image {} ({}/{})'.format(name, i+1, len(paths))) bgra_ref = np.concatenate((fg, (255. * alpha.reshape((alpha.shape[0], alpha.shape[1], 1))).astype(np.uint8)), axis=2) cv2.imwrite(os.path.join(dst_fg, '{}_fg_ref.png'.format(name)), bgra_ref) for i in progressbar.progressbar(range(n)): bg_path = voc_list[np.random.randint(len(voc_list))] bg = cv2.imread(bg_path) bg = cv2.resize(bg, dsize=(fg.shape[1], fg.shape[0]), interpolation=cv2.INTER_LINEAR) nfg, nbg, nal = augment(fg, bg, alpha) augmented_fg = np.concatenate((nfg, (255. * nal.reshape((nal.shape[0], nal.shape[1], 1))).astype(np.uint8)), axis=2) cv2.imwrite(os.path.join(dst_bg, '{}_bg_ref_{:04d}.png'.format(name, i)), bg) cv2.imwrite(os.path.join(dst_bg, '{}_bg_{:04d}.png'.format(name, i)), nbg) cv2.imwrite(os.path.join(dst_fg, '{}_fg_{:04d}.png'.format(name, i)), augmented_fg)
def load_and_crop(entry, input_size): """ loads load input/label from training list entry """ fg_path, tr_path, bg_path = entry alpha, fg = reader.read_fg_img(fg_path) fg = fg.astype(dtype=np.float) # potentially very big bg = cv2.imread(bg_path).astype(dtype=np.float) trimap = cv2.imread(tr_path, 0) / 255. trimap = trimap.reshape((trimap.shape[0], trimap.shape[1], 1)) crop_type = [(320, 320), (480, 480), (640, 640)] # we crop images of different sizes crop_h, crop_w = crop_type[np.random.randint(0, len(crop_type))] fg_h, fg_w = fg.shape[:2] if fg_h < crop_h or fg_w < crop_w: # in that case the image is not too big, and we have to add padding alpha = alpha.reshape((alpha.shape[0], alpha.shape[1], 1)) cat = np.concatenate((fg, alpha, trimap), axis=2) cropped_cat = get_padded_img(cat, crop_h, crop_w) fg, alpha, trimap = np.split(cropped_cat, indices_or_sections=[3, 4], axis=2) # otherwise, the fg is likely to be HRes, we directly crop it and dismiss the original image # to avoid manipulation big images fg_h, fg_w = fg.shape[:2] i, j = np.random.randint(0, fg_h - crop_h + 1), np.random.randint( 0, fg_w - crop_w + 1) fg = fg[i:i + crop_h, j:j + crop_h] alpha = alpha[i:i + crop_h, j:j + crop_h] trimap = trimap[i:i + crop_h, j:j + crop_h] # randomly picks top-left corner bg_crop_h, bg_crop_w = int(np.ceil(crop_h * bg.shape[0] / fg.shape[0])),\ int(np.ceil(crop_w * bg.shape[1] / fg.shape[1])) padded_bg = get_padded_img(bg, bg_crop_h, bg_crop_w) i, j = np.random.randint(0, bg.shape[0] - bg_crop_h + 1), np.random.randint( 0, bg.shape[1] - bg_crop_w + 1) cropped_bg = padded_bg[i:i + bg_crop_h, j:j + bg_crop_w] bg = cv2.resize(src=cropped_bg, dsize=input_size, interpolation=cv2.INTER_LINEAR) fg = cv2.resize(fg, input_size, interpolation=cv2.INTER_LINEAR) alpha = cv2.resize(alpha, input_size, interpolation=cv2.INTER_LINEAR) trimap = cv2.resize(trimap, input_size, interpolation=cv2.INTER_LINEAR) cmp = reader.create_composite_image(fg, bg, alpha) cmp -= params.VGG_MEAN bg -= params.VGG_MEAN trimap -= 0.5 h, w = cmp.shape[:2] # inp = np.concatenate((cmp, # bg, # trimap.reshape((h, w, 1))), axis=2) inp = np.concatenate((cmp, bg), axis=2) label = alpha.reshape((alpha.shape[0], alpha.shape[1], 1)) return inp, label, fg
# cv2.createTrackbar('threshold', 'image', 0, 50, nothing) # while True: # excl = np.zeros((h, w), dtype=np.float) # thresh = cv2.getTrackbarPos('threshold', 'image') # excl[np.where(err > thresh)] = 1. # cv2.imshow('image', excl) # k = cv2.waitKey(1) & 0xFF # if k == 27: # break return alpha if __name__ == '__main__': flow_f = reader.read_flow('./test_data/forward.flo') flow_b = reader.read_flow('./test_data/backward.flo') alp, img = reader.read_fg_img('./test_data/in0062.png') # bgr = warp_bgr(img, flow_b) # bgr2 = warp_bgr(bgr, flow_f) h, w = img.shape[:2] alpha = warp_img(alp, flow_b) cv2.imshow('noncorr', alpha) alpha = correct_alpha(flow_b, flow_f, alpha) cv2.imshow('test', alpha) cv2.waitKey(0) # res = np.concatenate((bgr, (255.*alpha.reshape(img.shape[0], img.shape[1], 1)).astype(np.uint8)), axis=2) / 255. # res = bgr # vis = np.zeros((h, w, 3), dtype=np.float) # vis[:, :, 2] = err.astype(np.float) # vis = 0.5 * (vis + res) # cv2.imwrite('./test_data/res.png', res)