class Synthetic2Data(object): def __init__(self, args): self.batch_size = args.batch_size self.num_objects = args.num_objects self.im_size = args.image_size self.im_channel = args.image_channel self.m_range = args.motion_range self.m_type = args.motion_type self.num_frame = args.num_frame self.bg_move = args.bg_move self.bg_noise = args.bg_noise self.m_dict, self.reverse_m_dict, self.m_kernel = self.motion_dict() self.visualizer = BaseVisualizer(args, self.reverse_m_dict) self.save_display = args.save_display self.save_display_dir = args.save_display_dir if args.fixed_data: numpy.random.seed(args.seed) def motion_dict(self): m_range = self.m_range m_dict, reverse_m_dict = {}, {} x = numpy.linspace(-m_range, m_range, 2 * m_range + 1) y = numpy.linspace(-m_range, m_range, 2 * m_range + 1) m_x, m_y = numpy.meshgrid(x, y) m_x, m_y, = m_x.reshape(-1).astype(int), m_y.reshape(-1).astype(int) m_kernel = numpy.zeros((1, len(m_x), 2 * m_range + 1, 2 * m_range + 1)) for i in range(len(m_x)): m_dict[(m_x[i], m_y[i])] = i reverse_m_dict[i] = (m_x[i], m_y[i]) m_kernel[:, i, -m_y[i] + m_range, -m_x[i] + m_range] = 1 return m_dict, reverse_m_dict, m_kernel def generate_data(self, src_image, src_mask): batch_size, im_size, im_channel = self.batch_size, self.im_size, self.im_channel # generate foreground motion src_motion, m = self.generate_motion(self.num_objects) # move foreground fg_im, fg_motion, fg_mask = self.move_foreground(src_image, src_motion, src_mask, m) # generate background src_bg = numpy.random.rand(batch_size, im_size, im_size, im_channel) * self.bg_noise # generate background motion if self.bg_move: src_motion, m = self.generate_motion(num_objects=1) else: src_motion = numpy.zeros((1, batch_size, im_size, im_size, 2)) m = numpy.zeros((1, batch_size, 2, 3)) m[0, :, 0, 0] = 1 m[0, :, 1, 1] = 1 # move background bg_im, bg_motion = self.move_background(src_bg, src_motion, m) # merge foreground and background, merge foreground motion and background motion mask = fg_mask == 0 motion_mask = numpy.concatenate((mask, mask), 4) fg_motion[motion_mask] = bg_motion[motion_mask] if self.im_channel == 1: im_mask = mask elif self.im_channel == 3: im_mask = numpy.concatenate((mask, mask, mask), 4) fg_im[im_mask] = bg_im[im_mask] # swap axes between bacth size and frame im = fg_im.swapaxes(0, 1).swapaxes(3, 4).swapaxes(2, 3) motion = fg_motion.swapaxes(0, 1).swapaxes(3, 4).swapaxes(2, 3) seg_layer = fg_mask.swapaxes(0, 1).swapaxes(3, 4).swapaxes(2, 3) return im, motion, seg_layer.astype(int) def generate_motion(self, num_objects): batch_size, im_size, m_range = self.batch_size, self.im_size, self.m_range m_type = self.m_type m = numpy.zeros((num_objects, batch_size, 2, 3)) if m_type == 'translation_discrete': t_x = numpy.random.randint(-m_range, m_range, size=(num_objects, batch_size)) t_y = numpy.random.randint(-m_range, m_range, size=(num_objects, batch_size)) m[:, :, 0, 0] = 1 m[:, :, 1, 1] = 1 m[:, :, 0, 2] = t_x m[:, :, 1, 2] = t_y elif m_type == 'translation': t_x = numpy.random.uniform(-m_range, m_range, size=(num_objects, batch_size)) t_y = numpy.random.uniform(-m_range, m_range, size=(num_objects, batch_size)) m[:, :, 0, 0] = 1 m[:, :, 1, 1] = 1 m[:, :, 0, 2] = t_x m[:, :, 1, 2] = t_y elif m_type == 'rotation': max_angle = m_range * 1.0 / im_size * 180 / math.pi * 1.8 angle = numpy.random.uniform(-max_angle, max_angle, size=(num_objects, batch_size)) for i in range(num_objects): for j in range(batch_size): m[i, j, :, :] = cv2.getRotationMatrix2D((im_size/2, im_size/2), angle[i, j], 1) elif m_type == 'affine': rand_idx = numpy.random.randint(0, 4) if rand_idx == 0: pts1 = numpy.float32([[0, 0], [im_size, 0], [im_size, im_size]]) elif rand_idx == 1: pts1 = numpy.float32([[im_size, 0], [im_size, im_size], [0, im_size]]) elif rand_idx == 2: pts1 = numpy.float32([[im_size, im_size], [0, im_size], [0, 0]]) elif rand_idx == 3: pts1 = numpy.float32([[0, im_size], [0, 0], [im_size, 0]]) max_shift = m_range * 0.33 for i in range(num_objects): for j in range(batch_size): shift = numpy.random.uniform(-max_shift, max_shift, size=(3, 2)) pts2 = pts1 + shift.astype(numpy.float32) m[i, j, :, :] = cv2.getAffineTransform(pts1, pts2) elif m_type == 'perspective': logging.error('motion generation for perspective transformation is not implemented') pts1 = numpy.float32([[0, 0], [im_size, 0], [0, im_size], [im_size, im_size]]) for i in range(num_objects): for j in range(batch_size): shift = numpy.random.uniform(-m_range, m_range, size=(4, 2)) pts2 = pts1 + shift.astype(numpy.float32) m[i, j, :, :] = cv2.getPerspectiveTransform(pts1, pts2) src_motion = numpy.zeros((num_objects, batch_size, im_size, im_size, 2)) pts = numpy.mgrid[0:im_size, 0:im_size].reshape(2, -1) pts = pts[::-1, :] pts = numpy.concatenate((pts, numpy.ones((1, pts.shape[1]))), 0) for i in range(num_objects): for j in range(batch_size): if m_type == 'perspective': logging.error('ground truth for perspective transformation is not implemented') motion = numpy.dot(m[i, j, :, :], pts) - pts[:2, :] src_motion[i, j, :, :, :] = motion.swapaxes(0, 1).reshape(im_size, im_size, 2) return src_motion, m def move_foreground(self, src_image, src_motion, src_mask, m): batch_size, num_frame, im_size = self.batch_size, self.num_frame, self.im_size im = numpy.zeros((num_frame, batch_size, im_size, im_size, self.im_channel)) motion = numpy.zeros((num_frame, batch_size, im_size, im_size, 2)) mask = numpy.zeros((num_frame, batch_size, im_size, im_size, 1)) for i in range(num_frame): im[i, ...], motion[i, ...], mask[i, ...] = \ self.merge_objects(src_image, src_motion, src_mask) src_image = self.move_image_fg(src_image, m) src_mask = self.move_mask(src_mask, m) return im, motion, mask def merge_objects(self, src_image, src_motion, src_mask): batch_size, num_objects, im_size = self.batch_size, self.num_objects, self.im_size im = numpy.zeros((batch_size, im_size, im_size, self.im_channel)) motion = numpy.zeros((batch_size, im_size, im_size, 2)) mask = numpy.zeros((batch_size, im_size, im_size, 1)) for i in range(num_objects): zero_mask = mask == 0 zero_motion_mask = numpy.concatenate((zero_mask, zero_mask), 3) if self.im_channel == 1: zero_im_mask = zero_mask elif self.im_channel == 3: zero_im_mask = numpy.concatenate((zero_mask, zero_mask, zero_mask), 3) im[zero_im_mask] = src_image[i, zero_im_mask] motion[zero_motion_mask] = src_motion[i, zero_motion_mask] mask[zero_mask] = src_mask[i, zero_mask] * (num_objects - i) return im, motion, mask def move_image_fg(self, im, m): batch_size, num_objects, im_size, = self.batch_size, self.num_objects, self.im_size m_type = self.m_type im_new = numpy.zeros((num_objects, batch_size, im_size, im_size, self.im_channel)) for i in range(num_objects): for j in range(batch_size): curr_im = im[i, j, :, :, :].squeeze() if m_type in ['translation', 'translation_discrete', 'rotation', 'affine']: if self.im_channel == 1: im_new[i, j, :, :, 0] = cv2.warpAffine(curr_im, m[i, j, :, :], (im_size, im_size)) elif self.im_channel == 3: im_new[i, j, ...] = cv2.warpAffine(curr_im, m[i, j, :, :], (im_size, im_size)) elif m_type == 'perspective': if self.im_channel == 1: im_new[i, j, :, :, 0] = cv2.warpAffine(curr_im, m[i, j, :, :], (im_size, im_size)) elif self.im_channel == 3: im_new[i, j, ...] = cv2.warpPerspective(curr_im, m[i, j, :, :], (im_size, im_size)) return im_new def move_mask(self, mask, m): batch_size, num_objects, im_size, = self.batch_size, self.num_objects, self.im_size m_type = self.m_type mask_new = numpy.zeros((num_objects, batch_size, im_size, im_size, 1)) for i in range(num_objects): for j in range(batch_size): curr_mask = mask[i, j, :, :, :].squeeze() if m_type in ['translation', 'translation_discrete', 'rotation', 'affine']: mask_new[i, j, :, :, 0] = cv2.warpAffine(curr_mask, m[i, j, :, :], (im_size, im_size)) elif m_type == 'perspective': mask_new[i, j, :, :, 0] = cv2.warpPerspective(curr_mask, m[i, j, :, :], (im_size, im_size)) mask_new = numpy.round(mask_new) return mask_new def move_background(self, src_image, src_motion, m): batch_size, num_frame, im_size = self.batch_size, self.num_frame, self.im_size im = numpy.zeros((num_frame, batch_size, im_size, im_size, self.im_channel)) motion = numpy.zeros((num_frame, batch_size, im_size, im_size, 2)) for i in range(num_frame): im[i, :, :, :, :] = src_image src_image = self.move_image_bg(src_image, m) motion[i, :, :, :, :] = src_motion return im, motion def move_image_bg(self, bg_im, m): batch_size, im_size, m_range = self.batch_size, self.im_size, self.m_range m_type = self.m_type im_new = numpy.zeros((batch_size, im_size, im_size, self.im_channel)) for i in range(batch_size): curr_im = bg_im[i, :, :, :].squeeze() if m_type in ['translation', 'translation_discrete', 'rotation', 'affine']: if self.im_channel == 1: im_new[i, :, :, 0] = cv2.warpAffine(curr_im, m[0, i, :, :], (im_size, im_size)) elif self.im_channel == 3: im_new[i, ...] = cv2.warpAffine(curr_im, m[0, i, :, :], (im_size, im_size)) elif m_type == 'perspective': if self.im_channel == 1: im_new[i, :, :, 0] = cv2.warpPerspective(curr_im, m[0, i, :, :], (im_size, im_size)) elif self.im_channel == 3: im_new[i, ...] = cv2.warpPerspective(curr_im, m[0, i, :, :], (im_size, im_size)) return im_new def display(self, im, motion, seg_layer): num_frame = self.num_frame width, height = self.visualizer.get_img_size(4, num_frame) img = numpy.ones((height, width, 3)) prev_im = None for i in range(num_frame): curr_im = im[0, i, :, :, :].transpose(1, 2, 0) x1, y1, x2, y2 = self.visualizer.get_img_coordinate(1, i + 1) img[y1:y2, x1:x2, :] = curr_im if i > 0: im_diff = abs(curr_im - prev_im) x1, y1, x2, y2 = self.visualizer.get_img_coordinate(2, i + 1) img[y1:y2, x1:x2, :] = im_diff prev_im = curr_im flow = motion[0, i, :, :, :].transpose(1, 2, 0) optical_flow = flowlib.visualize_flow(flow, self.m_range) x1, y1, x2, y2 = self.visualizer.get_img_coordinate(3, i + 1) img[y1:y2, x1:x2, :] = optical_flow / 255.0 seg_max = seg_layer[0, i, :, :, :].max() seg = seg_layer[0, i, :, :, :].squeeze() * 1.0 / seg_max cmap = plt.get_cmap('jet') seg_map = cmap(seg)[:, :, 0:3] x1, y1, x2, y2 = self.visualizer.get_img_coordinate(4, i + 1) img[y1:y2, x1:x2, :] = seg_map if self.save_display: img = img * 255.0 img = img.astype(numpy.uint8) img = Image.fromarray(img) img.save(os.path.join(self.save_display_dir, 'data.png')) else: plt.figure(1) plt.imshow(img) plt.axis('off') plt.show()
class Synthetic2Data(object): def __init__(self, args): self.batch_size = args.batch_size self.num_objects = args.num_objects self.im_size = args.image_size self.im_channel = args.image_channel self.m_range = args.motion_range self.num_frame = args.num_frame self.bg_move = args.bg_move self.m_dict, self.reverse_m_dict, self.m_kernel = self.motion_dict() self.visualizer = BaseVisualizer(args, self.reverse_m_dict) self.save_display = args.save_display self.save_display_dir = args.save_display_dir if args.fixed_data: numpy.random.seed(args.seed) def motion_dict(self): m_range = self.m_range m_dict, reverse_m_dict = {}, {} x = numpy.linspace(-m_range, m_range, 2 * m_range + 1) y = numpy.linspace(-m_range, m_range, 2 * m_range + 1) m_x, m_y = numpy.meshgrid(x, y) m_x, m_y, = m_x.reshape(-1).astype(int), m_y.reshape(-1).astype(int) m_kernel = numpy.zeros((1, len(m_x), 2 * m_range + 1, 2 * m_range + 1)) for i in range(len(m_x)): m_dict[(m_x[i], m_y[i])] = i reverse_m_dict[i] = (m_x[i], m_y[i]) m_kernel[:, i, m_y[i] + m_range, m_x[i] + m_range] = 1 return m_dict, reverse_m_dict, m_kernel def generate_data(self, src_fg, src_mask, src_bg): batch_size, im_size, im_channel = self.batch_size, self.im_size, self.im_channel m_dict = self.m_dict # generate foreground motion src_motion, src_motion_label, m_x, m_y = self.generate_motion( self.num_objects, src_mask) # move foreground fg_im, fg_motion, fg_motion_label, fg_mask = \ self.move_foreground(src_fg, src_motion, src_motion_label, src_mask, m_x, m_y) # generate background motion if self.bg_move: bg_motion, bg_motion_label, m_x, m_y = self.generate_motion( num_objects=1) else: bg_motion = numpy.zeros((1, batch_size, 2, im_size, im_size)) bg_motion_label = m_dict[(0, 0)] * numpy.ones( (1, batch_size, 1, im_size, im_size)) m_x = numpy.zeros((1, batch_size)).astype(int) m_y = numpy.zeros((1, batch_size)).astype(int) # move background bg_im, bg_motion, bg_motion_label = \ self.move_background(src_bg, bg_motion, bg_motion_label, m_x, m_y) # merge foreground and background, merge foreground motion and background motion mask = fg_mask == 0 fg_motion_label[mask] = bg_motion_label[mask] motion_mask = numpy.concatenate((mask, mask), 2) fg_motion[motion_mask] = bg_motion[motion_mask] if self.im_channel == 1: im_mask = mask elif self.im_channel == 3: im_mask = numpy.concatenate((mask, mask, mask), 2) fg_im[im_mask] = bg_im[im_mask] # swap axes between bacth size and frame im, motion, motion_label, seg_layer = fg_im.swapaxes(0, 1), fg_motion.swapaxes(0, 1), \ fg_motion_label.swapaxes(0, 1), fg_mask.swapaxes(0, 1) return im, motion.astype(int), motion_label.astype( int), seg_layer.astype(int) def generate_motion(self, num_objects, src_mask=None): batch_size, im_size = self.batch_size, self.im_size m_dict, reverse_m_dict = self.m_dict, self.reverse_m_dict m_label = numpy.random.randint(0, len(m_dict), size=(num_objects, batch_size)) m_x = numpy.zeros((num_objects, batch_size)).astype(int) m_y = numpy.zeros((num_objects, batch_size)).astype(int) for i in range(num_objects): for j in range(batch_size): (m_x[i, j], m_y[i, j]) = reverse_m_dict[m_label[i, j]] src_motion = numpy.zeros( (num_objects, batch_size, 2, im_size, im_size)) src_motion_label = \ m_dict[(0, 0)] * numpy.ones((num_objects, batch_size, 1, im_size, im_size)) if src_mask is None: src_mask = numpy.ones( (num_objects, batch_size, 1, im_size, im_size)) for i in range(num_objects): for j in range(batch_size): mask = src_mask[i, j, 0, :, :] > 0 src_motion[i, j, 0, mask] = m_x[i, j] src_motion[i, j, 1, mask] = m_y[i, j] src_motion_label[i, j, 0, mask] = m_label[i, j] return src_motion, src_motion_label, m_x, m_y def move_foreground(self, src_image, src_motion, src_motion_label, src_mask, m_x, m_y): batch_size, num_frame, im_size = self.batch_size, self.num_frame, self.im_size im = numpy.zeros( (num_frame, batch_size, self.im_channel, im_size, im_size)) motion = numpy.zeros((num_frame, batch_size, 2, im_size, im_size)) motion_label = numpy.zeros( (num_frame, batch_size, 1, im_size, im_size)) mask = numpy.zeros((num_frame, batch_size, 1, im_size, im_size)) for i in range(num_frame): im[i, ...], motion[i, ...], motion_label[i, ...], mask[i, ...] = \ self.merge_objects(src_image, src_motion, src_motion_label, src_mask) src_image = self.move_image_fg(src_image, m_x, m_y) src_motion = self.move_motion(src_motion, m_x, m_y) src_motion_label = self.move_motion_label(src_motion_label, m_x, m_y) src_mask = self.move_mask(src_mask, m_x, m_y) return im, motion, motion_label, mask def merge_objects(self, src_image, src_motion, src_motion_label, src_mask): batch_size, num_objects, im_size = self.batch_size, self.num_objects, self.im_size im = numpy.zeros((batch_size, self.im_channel, im_size, im_size)) motion = numpy.zeros((batch_size, 2, im_size, im_size)) motion_label = numpy.zeros((batch_size, 1, im_size, im_size)) mask = numpy.zeros((batch_size, 1, im_size, im_size)) for i in range(num_objects): zero_mask = mask == 0 zero_motion_mask = numpy.concatenate((zero_mask, zero_mask), 1) if self.im_channel == 1: zero_im_mask = zero_mask elif self.im_channel == 3: zero_im_mask = numpy.concatenate( (zero_mask, zero_mask, zero_mask), 1) im[zero_im_mask] = src_image[i, zero_im_mask] motion[zero_motion_mask] = src_motion[i, zero_motion_mask] motion_label[zero_mask] = src_motion_label[i, zero_mask] mask[zero_mask] = src_mask[i, zero_mask] return im, motion, motion_label, mask def move_image_fg(self, im, m_x, m_y): batch_size, im_size, im_channel = self.batch_size, self.im_size, self.im_channel m_range, num_objects = self.m_range, self.num_objects im_big = numpy.zeros((num_objects, batch_size, im_channel, im_size + m_range * 2, im_size + m_range * 2)) im_big[:, :, :, m_range:-m_range, m_range:-m_range] = im im_new = numpy.zeros( (num_objects, batch_size, im_channel, im_size, im_size)) for i in range(num_objects): for j in range(batch_size): x = m_range + m_x[i, j] y = m_range + m_y[i, j] im_new[i, j, :, :, :] = im_big[i, j, :, y:y + im_size, x:x + im_size] return im_new def move_motion(self, motion, m_x, m_y): batch_size, im_size, m_range = self.batch_size, self.im_size, self.m_range num_objects = self.num_objects m_dict = self.m_dict motion_big = m_dict[(0, 0)] * numpy.ones( (num_objects, batch_size, 2, im_size + m_range * 2, im_size + m_range * 2)) motion_big[:, :, :, m_range:-m_range, m_range:-m_range] = motion motion_new = numpy.zeros( (num_objects, batch_size, 2, im_size, im_size)) for i in range(num_objects): for j in range(batch_size): x = m_range + m_x[i, j] y = m_range + m_y[i, j] motion_new[i, j, :, :, :] = motion_big[i, j, :, y:y + im_size, x:x + im_size] return motion_new def move_motion_label(self, motion_label, m_x, m_y): batch_size, im_size, m_range = self.batch_size, self.im_size, self.m_range num_objects = self.num_objects motion_label_big = numpy.zeros( (num_objects, batch_size, 1, im_size + m_range * 2, im_size + m_range * 2)) motion_label_big[:, :, :, m_range:-m_range, m_range:-m_range] = motion_label motion_label_new = numpy.zeros( (num_objects, batch_size, 1, im_size, im_size)) for i in range(num_objects): for j in range(batch_size): x = m_range + m_x[i, j] y = m_range + m_y[i, j] motion_label_new[i, j, :, :, :] = \ motion_label_big[i, j, :, y:y + im_size, x:x + im_size] return motion_label_new def move_mask(self, mask, m_x, m_y): batch_size, im_size, m_range = self.batch_size, self.im_size, self.m_range num_objects = self.num_objects mask_big = numpy.zeros((num_objects, batch_size, 1, im_size + m_range * 2, im_size + m_range * 2)) mask_big[:, :, :, m_range:-m_range, m_range:-m_range] = mask mask_new = numpy.zeros((num_objects, batch_size, 1, im_size, im_size)) for i in range(num_objects): for j in range(batch_size): x = m_range + m_x[i, j] y = m_range + m_y[i, j] mask_new[i, j, :, :, :] = mask_big[i, j, :, y:y + im_size, x:x + im_size] return mask_new def move_background(self, src_bg, src_motion, src_motion_label, m_x, m_y): batch_size, num_frame, im_size = self.batch_size, self.num_frame, self.im_size im = numpy.zeros( (num_frame, batch_size, self.im_channel, im_size, im_size)) motion = numpy.zeros((num_frame, batch_size, 2, im_size, im_size)) motion_label = numpy.zeros( (num_frame, batch_size, 1, im_size, im_size)) height, width = src_bg.shape[2], src_bg.shape[3] xc, yc = numpy.ones((batch_size)) * width / 2, numpy.ones( (batch_size)) * height / 2 xc, yc = xc.astype(numpy.int), yc.astype(numpy.int) for i in range(num_frame): x1, y1, x2, y2 = xc - im_size / 2, yc - im_size / 2, xc + im_size / 2, yc + im_size / 2 for j in range(batch_size): im[i, j, :, :, :] = src_bg[j, :, y1[j]:y2[j], x1[j]:x2[j]] motion[i, :, :, :, :] = src_motion motion_label[i, :, :, :, :] = src_motion_label xc, yc = xc + m_x[0, :], yc + m_y[0, :] return im, motion, motion_label def display(self, im, motion, seg_layer): num_frame = self.num_frame width, height = self.visualizer.get_img_size(4, num_frame) img = numpy.ones((height, width, 3)) prev_im = None for i in range(num_frame): curr_im = im[0, i, :, :, :].transpose(1, 2, 0) x1, y1, x2, y2 = self.visualizer.get_img_coordinate(1, i + 1) img[y1:y2, x1:x2, :] = curr_im if i > 0: im_diff = abs(curr_im - prev_im) x1, y1, x2, y2 = self.visualizer.get_img_coordinate(2, i + 1) img[y1:y2, x1:x2, :] = im_diff prev_im = curr_im flow = motion[0, i, :, :, :].transpose(1, 2, 0) optical_flow = flowlib.visualize_flow(flow, self.m_range) x1, y1, x2, y2 = self.visualizer.get_img_coordinate(3, i + 1) img[y1:y2, x1:x2, :] = optical_flow / 255.0 seg_max = seg_layer[0, i, :, :, :].max() seg = seg_layer[0, i, :, :, :].squeeze() * 1.0 / seg_max cmap = plt.get_cmap('jet') seg_map = cmap(seg)[:, :, 0:3] x1, y1, x2, y2 = self.visualizer.get_img_coordinate(4, i + 1) img[y1:y2, x1:x2, :] = seg_map if self.save_display: img = img * 255.0 img = img.astype(numpy.uint8) img = Image.fromarray(img) img.save(os.path.join(self.save_display_dir, 'data.png')) else: plt.figure(1) plt.imshow(img) plt.axis('off') plt.show()
class RealData(object): def __init__(self, args): self.batch_size = args.batch_size self.im_size = args.image_size self.im_channel = args.image_channel self.m_range = args.motion_range self.num_frame = args.num_frame self.m_dict, self.reverse_m_dict, self.m_kernel = self.motion_dict() self.visualizer = BaseVisualizer(args, self.reverse_m_dict) self.save_display = args.save_display self.save_display_dir = args.save_display_dir self.min_diff_thresh = args.min_diff_thresh self.max_diff_thresh = args.max_diff_thresh self.diff_div_thresh = args.diff_div_thresh self.fixed_data = args.fixed_data if args.fixed_data: numpy.random.seed(args.seed) self.rand_noise = args.rand_noise self.augment_reverse = args.augment_reverse def motion_dict(self): m_range = self.m_range m_dict, reverse_m_dict = {}, {} x = numpy.linspace(-m_range, m_range, 2 * m_range + 1) y = numpy.linspace(-m_range, m_range, 2 * m_range + 1) m_x, m_y = numpy.meshgrid(x, y) m_x, m_y, = m_x.reshape(-1).astype(int), m_y.reshape(-1).astype(int) m_kernel = numpy.zeros((1, len(m_x), 2 * m_range + 1, 2 * m_range + 1)) for i in range(len(m_x)): m_dict[(m_x[i], m_y[i])] = i reverse_m_dict[i] = (m_x[i], m_y[i]) m_kernel[:, i, -m_y[i] + m_range, -m_x[i] + m_range] = 1 return m_dict, reverse_m_dict, m_kernel def get_meta(self, image_dir): meta, cnt = {}, 0 for sub_dir in os.listdir(image_dir): for sub_sub_dir in os.listdir(os.path.join(image_dir, sub_dir)): image_files = os.listdir(os.path.join(image_dir, sub_dir, sub_sub_dir)) image_files.sort(key=lambda f: int(filter(str.isdigit, f))) image_names = [os.path.join(image_dir, sub_dir, sub_sub_dir, f) for f in image_files] num_images = len(image_names) if num_images < self.num_frame: continue for i in range(num_images - self.num_frame + 1): meta[cnt] = image_names[i:i + self.num_frame] cnt += 1 return meta def generate_data(self, meta): batch_size, im_size, num_frame = self.batch_size, self.im_size, self.num_frame im_channel = self.im_channel min_diff_thresh, max_diff_thresh = self.min_diff_thresh, self.max_diff_thresh diff_div_thresh = self.diff_div_thresh idx = numpy.random.permutation(len(meta)) im = numpy.zeros((batch_size, num_frame, im_channel, im_size, im_size)) i, cnt = 0, 0 while i < batch_size: image_names = meta[idx[cnt]] for j in range(len(image_names)): if im_channel == 1: image = numpy.array(Image.open(image_names[j]).convert('L')) image = image / 255.0 image = numpy.expand_dims(image, 3) elif im_channel == 3: image = numpy.array(Image.open(image_names[j])) image = image / 255.0 if j == 0: height, width = image.shape[0], image.shape[1] idx_h = numpy.random.randint(0, height + 1 - im_size) idx_w = numpy.random.randint(0, width + 1 - im_size) image = image.transpose((2, 0, 1)) im[i, j, :, :, :] = image[:, idx_h:idx_h+im_size, idx_w:idx_w+im_size] cnt = cnt + 1 im_diff = numpy.zeros((num_frame - 1)) for j in range(num_frame - 1): diff = numpy.abs(im[i, j, :, :, :] - im[i, j+1, :, :, :]) im_diff[j] = numpy.sum(diff) / im_channel / im_size / im_size if any(im_diff < min_diff_thresh) or any(im_diff > max_diff_thresh): continue if num_frame > 2: im_diff_div = im_diff / (numpy.median(im_diff) + 1e-5) if any(im_diff_div > diff_div_thresh) or any(im_diff_div < 1/diff_div_thresh): continue if self.augment_reverse: if numpy.random.rand() < 0.5: im[i, :, :, :, :] = im[i, ::-1, :, :, :] i = i + 1 im = im + (numpy.random.rand(batch_size, num_frame, im_channel, im_size, im_size) - 0.5) * self.rand_noise im[im > 1] = 1 im[im < 0] = 0 return im def display(self, im): num_frame, im_channel = self.num_frame, self.im_channel width, height = self.visualizer.get_img_size(2, num_frame) img = numpy.ones((height, width, 3)) prev_im = None for i in range(num_frame): curr_im = im[0, i, :, :, :].transpose(1, 2, 0) x1, y1, x2, y2 = self.visualizer.get_img_coordinate(1, i + 1) img[y1:y2, x1:x2, :] = curr_im if i > 0: im_diff = abs(curr_im - prev_im) x1, y1, x2, y2 = self.visualizer.get_img_coordinate(2, i + 1) img[y1:y2, x1:x2, :] = im_diff prev_im = curr_im if self.save_display: img = img * 255.0 img = img.astype(numpy.uint8) img = Image.fromarray(img) img.save(os.path.join(self.save_display_dir, 'data.png')) else: plt.figure(1) plt.imshow(img) plt.axis('off') plt.show()
class SyntheticDataBidirect(object): def __init__(self, args): self.batch_size = args.batch_size self.num_objects = args.num_objects self.im_size = args.image_size self.m_range = args.motion_range self.num_frame = args.num_frame self.num_frame_one_direction = (self.num_frame + 1) / 2 self.bg_move = args.bg_move self.bg_noise = args.bg_noise self.m_dict, self.reverse_m_dict, self.m_kernel = self.motion_dict() self.visualizer = BaseVisualizer(args, self.reverse_m_dict) self.save_display = args.save_display self.save_display_dir = args.save_display_dir def motion_dict(self): m_range = self.m_range m_dict, reverse_m_dict = {}, {} x = numpy.linspace(-m_range, m_range, 2 * m_range + 1) y = numpy.linspace(-m_range, m_range, 2 * m_range + 1) m_x, m_y = numpy.meshgrid(x, y) m_x, m_y, = m_x.reshape(-1).astype(int), m_y.reshape(-1).astype(int) m_kernel = numpy.zeros((1, len(m_x), 2 * m_range + 1, 2 * m_range + 1)) for i in range(len(m_x)): m_dict[(m_x[i], m_y[i])] = i reverse_m_dict[i] = (m_x[i], m_y[i]) m_kernel[:, i, m_y[i] + m_range, m_x[i] + m_range] = 1 return m_dict, reverse_m_dict, m_kernel def generate_data(self, src_image, src_mask): batch_size, im_size = self.batch_size, self.im_size m_dict = self.m_dict # generate forward foreground motion src_motion_f, src_motion_label_f, m_x_f, m_y_f, src_motion_b, src_motion_label_b, m_x_b, m_y_b = \ self.generate_motion(self.num_objects, src_mask) # move foreground forward fg_im_f, fg_motion_f, fg_motion_f_r, fg_motion_label_f, fg_motion_label_f_r, fg_mask_f = \ self.move_foreground(src_image, src_motion_f, src_motion_b, src_motion_label_f, src_motion_label_b, src_mask, m_x_f, m_y_f) # move foreground backward fg_im_b, fg_motion_b, fg_motion_b_r, fg_motion_label_b, fg_motion_label_b_r, fg_mask_b = \ self.move_foreground(src_image, src_motion_b, src_motion_f, src_motion_label_b, src_motion_label_f, src_mask, m_x_b, m_y_b) # generate background src_bg = numpy.random.rand(batch_size, 3, im_size, im_size) * self.bg_noise # generate background motion if self.bg_move: src_motion_f, src_motion_label_f, m_x_f, m_y_f, src_motion_b, src_motion_label_b, m_x_b, m_y_b = self.generate_motion( 1) else: src_motion_f = numpy.zeros((1, batch_size, 2, im_size, im_size)) src_motion_label_f = m_dict[(0, 0)] * numpy.ones( (1, batch_size, 1, im_size, im_size)) m_x_f = numpy.zeros((1, batch_size)).astype(int) m_y_f = numpy.zeros((1, batch_size)).astype(int) src_motion_b = numpy.zeros((1, batch_size, 2, im_size, im_size)) src_motion_label_b = m_dict[(0, 0)] * numpy.ones( (1, batch_size, 1, im_size, im_size)) m_x_b = numpy.zeros((1, batch_size)).astype(int) m_y_b = numpy.zeros((1, batch_size)).astype(int) # move background bg_im_f, bg_motion_f, bg_motion_f_r, bg_motion_label_f, bg_motion_label_f_r = \ self.move_background(src_bg, src_motion_f, src_motion_b, src_motion_label_f, src_motion_label_b, m_x_f, m_y_f) bg_im_b, bg_motion_b, bg_motion_b_r, bg_motion_label_b, bg_motion_label_b_r = \ self.move_background(src_bg, src_motion_b, src_motion_f, src_motion_label_b, src_motion_label_f, m_x_b, m_y_b) # merge foreground and background, merge foreground motion and background motion mask = fg_mask_f == 0 fg_motion_label_f[mask] = bg_motion_label_f[mask] fg_motion_label_f_r[mask] = bg_motion_label_f_r[mask] motion_mask = numpy.concatenate((mask, mask), 2) fg_motion_f[motion_mask] = bg_motion_f[motion_mask] fg_motion_f_r[motion_mask] = bg_motion_f_r[motion_mask] im_mask = numpy.concatenate((mask, mask, mask), 2) fg_im_f[im_mask] = bg_im_f[im_mask] mask = fg_mask_b == 0 fg_motion_label_b[mask] = bg_motion_label_b[mask] fg_motion_label_b_r[mask] = bg_motion_label_b_r[mask] motion_mask = numpy.concatenate((mask, mask), 2) fg_motion_b[motion_mask] = bg_motion_b[motion_mask] fg_motion_b_r[motion_mask] = bg_motion_b_r[motion_mask] im_mask = numpy.concatenate((mask, mask, mask), 2) fg_im_b[im_mask] = bg_im_b[im_mask] # merge forward and backward frames and motions im, motion, motion_r, motion_label, motion_label_r, seg_layer = \ self.merge_forward_backward(fg_im_f, fg_motion_f, fg_motion_f_r, fg_motion_label_f, fg_motion_label_f_r, fg_im_b, fg_motion_b, fg_motion_b_r, fg_motion_label_b, fg_motion_label_b_r, fg_mask_f, fg_mask_b) # swap axes between batch size and frame im, motion, motion_r, motion_label, motion_label_r, seg_layer = im.swapaxes( 0, 1), motion.swapaxes(0, 1), motion_r.swapaxes( 0, 1), motion_label.swapaxes(0, 1), motion_label_r.swapaxes( 0, 1), seg_layer.swapaxes(0, 1) return im, motion.astype(int), motion_r.astype( int), motion_label.astype(int), motion_label_r.astype( int), seg_layer.astype(int) def generate_motion(self, num_objects, src_mask=None): batch_size, im_size = self.batch_size, self.im_size m_dict, reverse_m_dict = self.m_dict, self.reverse_m_dict m_label = numpy.random.randint(0, len(m_dict), size=(num_objects, batch_size)) m_x_f = numpy.zeros((num_objects, batch_size)).astype(int) m_y_f = numpy.zeros((num_objects, batch_size)).astype(int) for i in range(num_objects): for j in range(batch_size): (m_x_f[i, j], m_y_f[i, j]) = reverse_m_dict[m_label[i, j]] m_x_b = numpy.zeros((num_objects, batch_size)).astype(int) m_y_b = numpy.zeros((num_objects, batch_size)).astype(int) for i in range(num_objects): for j in range(batch_size): (m_x_b[i, j], m_y_b[i, j]) = (-m_x_f[i, j], -m_y_f[i, j]) src_motion_f = numpy.zeros( (num_objects, batch_size, 2, im_size, im_size)) src_motion_b = numpy.zeros( (num_objects, batch_size, 2, im_size, im_size)) src_motion_label_f = m_dict[(0, 0)] * numpy.ones( (num_objects, batch_size, 1, im_size, im_size)) src_motion_label_b = m_dict[(0, 0)] * numpy.ones( (num_objects, batch_size, 1, im_size, im_size)) if src_mask is None: src_mask = numpy.ones( (num_objects, batch_size, 1, im_size, im_size)) for i in range(num_objects): for j in range(batch_size): mask = src_mask[i, j, 0, :, :] > 0 src_motion_f[i, j, 0, mask] = m_x_f[i, j] src_motion_f[i, j, 1, mask] = m_y_f[i, j] src_motion_b[i, j, 0, mask] = m_x_b[i, j] src_motion_b[i, j, 1, mask] = m_y_b[i, j] src_motion_label_f[i, j, 0, mask] = m_label[i, j] src_motion_label_b[i, j, 0, mask] = m_dict[(m_x_b[i, j], m_y_b[i, j])] return src_motion_f, src_motion_label_f, m_x_f, m_y_f, src_motion_b, src_motion_label_b, m_x_b, m_y_b def move_foreground(self, src_image, src_motion, src_motion_r, src_motion_label, src_motion_label_r, src_mask, m_x, m_y): batch_size, num_frame, im_size = self.batch_size, self.num_frame_one_direction, self.im_size im = numpy.zeros((num_frame, batch_size, 3, im_size, im_size)) motion = numpy.zeros((num_frame, batch_size, 2, im_size, im_size)) motion_r = numpy.zeros((num_frame, batch_size, 2, im_size, im_size)) motion_label = numpy.zeros( (num_frame, batch_size, 1, im_size, im_size)) motion_label_r = numpy.zeros( (num_frame, batch_size, 1, im_size, im_size)) mask = numpy.zeros((num_frame, batch_size, 1, im_size, im_size)) for i in range(num_frame): im[i, ...], motion[i, ...], motion_r[i, ...], motion_label[i, ...], motion_label_r[i, ...], mask[i, ...] = \ self.merge_objects(src_image, src_motion, src_motion_r, src_motion_label, src_motion_label_r, src_mask) src_image = self.move_image_fg(src_image, m_x, m_y) src_motion = self.move_motion(src_motion, m_x, m_y) src_motion_r = self.move_motion(src_motion_r, m_x, m_y) src_motion_label = self.move_motion_label(src_motion_label, m_x, m_y) src_motion_label_r = self.move_motion_label( src_motion_label_r, m_x, m_y) src_mask = self.move_mask(src_mask, m_x, m_y) return im, motion, motion_r, motion_label, motion_label_r, mask def merge_objects(self, src_image, src_motion, src_motion_r, src_motion_label, src_motion_label_r, src_mask): batch_size, num_objects, im_size = self.batch_size, self.num_objects, self.im_size im = numpy.zeros((batch_size, 3, im_size, im_size)) motion = numpy.zeros((batch_size, 2, im_size, im_size)) motion_r = numpy.zeros((batch_size, 2, im_size, im_size)) motion_label = numpy.zeros((batch_size, 1, im_size, im_size)) motion_label_r = numpy.zeros((batch_size, 1, im_size, im_size)) mask = numpy.zeros((batch_size, 1, im_size, im_size)) for i in range(num_objects): zero_mask = mask == 0 zero_motion_mask = numpy.concatenate((zero_mask, zero_mask), 1) zero_im_mask = numpy.concatenate((zero_mask, zero_mask, zero_mask), 1) im[zero_im_mask] = src_image[i, zero_im_mask] motion[zero_motion_mask] = src_motion[i, zero_motion_mask] motion_r[zero_motion_mask] = src_motion_r[i, zero_motion_mask] motion_label[zero_mask] = src_motion_label[i, zero_mask] motion_label_r[zero_mask] = src_motion_label_r[i, zero_mask] mask[zero_mask] = src_mask[i, zero_mask] return im, motion, motion_r, motion_label, motion_label_r, mask def move_image_fg(self, im, m_x, m_y): batch_size, im_size, m_range = self.batch_size, self.im_size, self.m_range num_objects = self.num_objects im_big = numpy.zeros((num_objects, batch_size, 3, im_size + m_range * 2, im_size + m_range * 2)) im_big[:, :, :, m_range:-m_range, m_range:-m_range] = im im_new = numpy.zeros((num_objects, batch_size, 3, im_size, im_size)) for i in range(num_objects): for j in range(batch_size): x = m_range + m_x[i, j] y = m_range + m_y[i, j] im_new[i, j, :, :, :] = im_big[i, j, :, y:y + im_size, x:x + im_size] return im_new def move_motion(self, motion, m_x, m_y): batch_size, im_size, m_range = self.batch_size, self.im_size, self.m_range num_objects = self.num_objects m_dict = self.m_dict motion_big = m_dict[(0, 0)] * numpy.ones( (num_objects, batch_size, 2, im_size + m_range * 2, im_size + m_range * 2)) motion_big[:, :, :, m_range:-m_range, m_range:-m_range] = motion motion_new = numpy.zeros( (num_objects, batch_size, 2, im_size, im_size)) for i in range(num_objects): for j in range(batch_size): x = m_range + m_x[i, j] y = m_range + m_y[i, j] motion_new[i, j, :, :, :] = motion_big[i, j, :, y:y + im_size, x:x + im_size] return motion_new def move_motion_label(self, motion_label, m_x, m_y): batch_size, im_size, m_range = self.batch_size, self.im_size, self.m_range num_objects = self.num_objects motion_label_big = numpy.zeros( (num_objects, batch_size, 1, im_size + m_range * 2, im_size + m_range * 2)) motion_label_big[:, :, :, m_range:-m_range, m_range:-m_range] = motion_label motion_label_new = numpy.zeros( (num_objects, batch_size, 1, im_size, im_size)) for i in range(num_objects): for j in range(batch_size): x = m_range + m_x[i, j] y = m_range + m_y[i, j] motion_label_new[i, j, :, :, :] = \ motion_label_big[i, j, :, y:y + im_size, x:x + im_size] return motion_label_new def move_mask(self, mask, m_x, m_y): batch_size, im_size, m_range = self.batch_size, self.im_size, self.m_range num_objects = self.num_objects mask_big = numpy.zeros((num_objects, batch_size, 1, im_size + m_range * 2, im_size + m_range * 2)) mask_big[:, :, :, m_range:-m_range, m_range:-m_range] = mask mask_new = numpy.zeros((num_objects, batch_size, 1, im_size, im_size)) for i in range(num_objects): for j in range(batch_size): x = m_range + m_x[i, j] y = m_range + m_y[i, j] mask_new[i, j, :, :, :] = mask_big[i, j, :, y:y + im_size, x:x + im_size] return mask_new def move_background(self, src_image, src_motion, src_motion_r, src_motion_label, src_motion_label_r, m_x, m_y): batch_size, num_frame, im_size = self.batch_size, self.num_frame_one_direction, self.im_size im = numpy.zeros((num_frame, batch_size, 3, im_size, im_size)) motion = numpy.zeros((num_frame, batch_size, 2, im_size, im_size)) motion_r = numpy.zeros((num_frame, batch_size, 2, im_size, im_size)) motion_label = numpy.zeros( (num_frame, batch_size, 1, im_size, im_size)) motion_label_r = numpy.zeros( (num_frame, batch_size, 1, im_size, im_size)) for i in range(num_frame): im[i, :, :, :, :] = src_image src_image = self.move_image_bg(src_image, m_x, m_y) motion[i, :, :, :, :] = src_motion motion_r[i, :, :, :] = src_motion_r motion_label[i, :, :, :, :] = src_motion_label motion_label_r[i, :, :, :, :] = src_motion_label_r return im, motion, motion_r, motion_label, motion_label_r def move_image_bg(self, bg_im, m_x, m_y): batch_size, im_size, m_range = self.batch_size, self.im_size, self.m_range bg_noise = self.bg_noise im_big = numpy.random.rand(batch_size, 3, im_size + m_range * 2, im_size + m_range * 2) * bg_noise im_big[:, :, m_range:-m_range, m_range:-m_range] = bg_im im_new = numpy.zeros((batch_size, 3, im_size, im_size)) for i in range(batch_size): x = m_range + m_x[0, i] y = m_range + m_y[0, i] im_new[i, :, :, :] = im_big[i, :, y:y + im_size, x:x + im_size] return im_new @staticmethod def merge_forward_backward(im_f, motion_f, motion_f_r, motion_label_f, motion_label_f_r, im_b, motion_b, motion_b_r, motion_label_b, motion_label_b_r, mask_f, mask_b): im_f = im_f[1:, ...] im_b = im_b[::-1, ...] im = numpy.concatenate((im_b, im_f), 0) motion_f = motion_f[1:, ...] motion_b_r = motion_b_r[::-1, ...] motion = numpy.concatenate((motion_b_r, motion_f), 0) motion_f_r = motion_f_r[1:, ...] motion_b = motion_b[::-1, ...] motion_r = numpy.concatenate((motion_b, motion_f_r), 0) motion_label_f = motion_label_f[1:, ...] motion_label_b_r = motion_label_b_r[::-1, ...] motion_label = numpy.concatenate((motion_label_b_r, motion_label_f), 0) motion_label_f_r = motion_label_f_r[1:, ...] motion_label_b = motion_label_b[::-1, ...] motion_label_r = numpy.concatenate((motion_label_b, motion_label_f_r), 0) mask_f = mask_f[1:, ...] mask_b = mask_b[::-1, ...] mask = numpy.concatenate((mask_b, mask_f), 0) return im, motion, motion_r, motion_label, motion_label_r, mask def display(self, im, motion, motion_r, seg_layer): num_frame = self.num_frame width, height = self.visualizer.get_img_size(5, num_frame) img = numpy.ones((height, width, 3)) prev_im = None for i in range(num_frame): curr_im = im[0, i, :, :, :].squeeze().transpose(1, 2, 0) x1, y1, x2, y2 = self.visualizer.get_img_coordinate(1, i + 1) img[y1:y2, x1:x2, :] = curr_im if i > 0: im_diff = abs(curr_im - prev_im) x1, y1, x2, y2 = self.visualizer.get_img_coordinate(2, i + 1) img[y1:y2, x1:x2, :] = im_diff prev_im = curr_im flow = motion[0, i, :, :, :].squeeze().transpose(1, 2, 0) optical_flow = flowlib.visualize_flow(flow, self.m_range) x1, y1, x2, y2 = self.visualizer.get_img_coordinate(3, i + 1) img[y1:y2, x1:x2, :] = optical_flow / 255.0 flow = motion_r[0, i, :, :, :].squeeze().transpose(1, 2, 0) optical_flow = flowlib.visualize_flow(flow, self.m_range) x1, y1, x2, y2 = self.visualizer.get_img_coordinate(4, i + 1) img[y1:y2, x1:x2, :] = optical_flow / 255.0 seg = seg_layer[0, i, :, :, :].squeeze() * 1.0 / seg_layer[ 0, i, :, :, :].max().astype(numpy.float) cmap = plt.get_cmap('jet') seg_map = cmap(seg)[:, :, 0:3] x1, y1, x2, y2 = self.visualizer.get_img_coordinate(5, i + 1) img[y1:y2, x1:x2, :] = seg_map if self.save_display: img = img * 255.0 img = img.astype(numpy.uint8) img = Image.fromarray(img) img.save(os.path.join(self.save_display_dir, 'data.png')) else: plt.figure(1) plt.imshow(img) plt.axis('off') plt.show()
class BaseDemo(object): def __init__(self, args): self.learning_rate = args.learning_rate self.train_epoch = args.train_epoch self.test_epoch = args.test_epoch self.test_interval = args.test_interval self.save_dir = args.save_dir self.display = args.display self.display_all = args.display_all self.best_improve_percent = -1e10 self.batch_size = args.batch_size self.im_size = args.image_size self.im_channel = args.image_channel self.num_frame = args.num_frame self.m_range = args.motion_range if args.data == 'box': self.data = BoxData(args) elif args.data == 'mnist': self.data = MnistData(args) elif args.data == 'box2': self.data = Box2Data(args) elif args.data == 'mnist2': self.data = Mnist2Data(args) elif args.data == 'robot64': self.data = Robot64Data(args) elif args.data == 'mpii64': self.data = Mpii64Data(args) elif args.data == 'mpii64_sample': self.data = Mpii64Sample(args) elif args.data == 'nyuv2': self.data = Nyuv2Data(args) else: logging.error('%s data not supported' % args.data) self.visualizer = BaseVisualizer(args, self.data.reverse_m_dict) def compare(self): width, height = self.visualizer.get_img_size(4, 3) img = numpy.ones((height, width, 3)) base1_loss, base2_loss, interp_loss, extrap_loss = [], [], [], [] for epoch in range(self.test_epoch): if self.data.name in ['box', 'mnist']: im, motion, _, _ = self.data.get_next_batch( self.data.test_images) elif self.data.name in ['box2', 'mnist2']: im, motion, _ = self.data.get_next_batch(self.data.test_images) elif self.data.name in [ 'robot64', 'mpii64', 'nyuv2', 'mpii64_sample' ]: im, motion = self.data.get_next_batch( self.data.test_meta), None elif self.data.name in ['mpii64_sample']: im, motion = self.data.get_next_batch( self.data.test_meta), None im = im[:, -self.num_frame:, :, :, :] else: logging.error('%s data not supported' % self.data.name) sys.exit() im = Variable(torch.from_numpy(im).float()) if torch.cuda.is_available(): im = im.cuda() im1 = im[:, 0, :, :, :] im2 = im[:, 1, :, :, :] im3 = im[:, 2, :, :, :] im = im1[0].cpu().data.numpy().transpose(1, 2, 0) x1, y1, x2, y2 = self.visualizer.get_img_coordinate(1, 1) img[y1:y2, x1:x2, :] = im im = im2[0].cpu().data.numpy().transpose(1, 2, 0) x1, y1, x2, y2 = self.visualizer.get_img_coordinate(1, 2) img[y1:y2, x1:x2, :] = im im = im3[0].cpu().data.numpy().transpose(1, 2, 0) x1, y1, x2, y2 = self.visualizer.get_img_coordinate(1, 3) img[y1:y2, x1:x2, :] = im im_diff = torch.abs(im1[0] - im2[0]).cpu().data.numpy().transpose( 1, 2, 0) x1, y1, x2, y2 = self.visualizer.get_img_coordinate(2, 1) img[y1:y2, x1:x2, :] = im_diff im_diff = torch.abs(im2[0] - im3[0]).cpu().data.numpy().transpose( 1, 2, 0) x1, y1, x2, y2 = self.visualizer.get_img_coordinate(2, 2) img[y1:y2, x1:x2, :] = im_diff base1_loss.append(torch.abs(im1 - im2).sum().data[0]) im_base = (im1 + im3) / 2 interp_loss.append(torch.abs(im_base - im2).sum().data[0]) im = im_base[0].cpu().data.numpy().transpose(1, 2, 0) x1, y1, x2, y2 = self.visualizer.get_img_coordinate(3, 1) img[y1:y2, x1:x2, :] = im im_diff = torch.abs(im_base[0] - im2[0]).cpu().data.numpy().transpose(1, 2, 0) x1, y1, x2, y2 = self.visualizer.get_img_coordinate(3, 2) img[y1:y2, x1:x2, :] = im_diff base2_loss.append(torch.abs(im2 - im3).sum().data[0]) im_base = 2 * im2 - im1 im_base[im_base < 0] = 0 im_base[im_base > 1] = 1 extrap_loss.append(torch.abs(im_base - im3).sum().data[0]) im = im_base[0].cpu().data.numpy().transpose(1, 2, 0) x1, y1, x2, y2 = self.visualizer.get_img_coordinate(4, 1) img[y1:y2, x1:x2, :] = im im_diff = torch.abs(im_base[0] - im3[0]).cpu().data.numpy().transpose(1, 2, 0) x1, y1, x2, y2 = self.visualizer.get_img_coordinate(4, 2) img[y1:y2, x1:x2, :] = im_diff plt.figure(1) plt.imshow(img) plt.axis('off') plt.show() base1_loss = numpy.mean(numpy.asarray(base1_loss)) interp_loss = numpy.mean(numpy.asarray(interp_loss)) base2_loss = numpy.mean(numpy.asarray(base2_loss)) extrap_loss = numpy.mean(numpy.asarray(extrap_loss)) print base1_loss, interp_loss print base2_loss, extrap_loss