def test_tile_images(self): B = numpy.random.randint(10, 20) n_col = numpy.random.randint(2, 5) H = 30 W = 40 fill = 128 pad = 0 imgs = numpy.random.uniform(255, size=(B, 3, H, W)) tile = tile_images(imgs, n_col, pad, fill=fill) if isinstance(pad, int): pad_y = pad pad_x = pad else: pad_y, pad_x = pad n_row = int(math.ceil(B / n_col)) self.assertTrue(n_col >= 1 and n_row >= 1) start_y_11 = H + pad_y + pad_y // 2 start_x_11 = W + pad_x + pad_x // 2 tile_11 = tile[:, start_y_11:start_y_11 + H, start_x_11:start_x_11 + W] numpy.testing.assert_equal(tile_11, imgs[(n_col - 1) + 2])
def test_tile_images(self): B = np.random.randint(10, 20) n_col = np.random.randint(2, 5) H = 30 W = 40 imgs = np.random.uniform(255, size=(B, 3, H, W)) tile = tile_images(imgs, n_col, self.pad, fill=self.fill) n_row = int(math.ceil(B / n_col)) self.assertTrue(n_col >= 1 and n_row >= 1) start_y_11 = H + self.pad + self.pad // 2 start_x_11 = W + self.pad + self.pad // 2 tile_11 = tile[:, start_y_11:start_y_11 + H, start_x_11:start_x_11 + W] np.testing.assert_equal(tile_11, imgs[(n_col - 1) + 2])
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--model_path', default='./progressive_growing_of_gans/Gs_chainer.npz') args = parser.parse_args() chainer.config.train = False latent = np.random.randn(4, 512).astype(np.float32) generator = Generator() chainer.serializers.load_npz(args.model_path, generator) with chainer.no_backprop_mode(): img = generator(latent) print(img.shape) # [-1, 1] -> [0, 255] image = cuda.to_cpu(img.array) * 127.5 + 127.5 image = image.clip(0.0, 255.0).astype(np.float32) utils.write_image(utils.tile_images(image, 2), 'out.png')
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=-1) parser.add_argument( '--model', default='', help='if not specified, you download and use a pre-trained model.') parser.add_argument('--snapshot', default='') parser.add_argument('--image', type=str) args = parser.parse_args() if not args.image: ValueError('args.image should be specified.') else: args.image = os.path.expanduser(args.image) detector = SSD512(pretrained_model='voc0712') model = StackedHG(16) if args.model: chainer.serializers.load_npz(args.model, model) elif args.snapshot: chainer.serializers.load_npz(snap2model_trainer(args.snapshot), model) else: # pre-trained model model_path = './models/model_2018_05_22.npz' if not os.path.exists(model_path): gdd.download_file_from_google_drive( file_id='1rZZJRpqQKkncn30Igtk8KirgR96QlCFO', dest_path=model_path) chainer.serializers.load_npz(model_path, model) if args.gpu >= 0: cuda.get_device_from_id(args.gpu).use() detector.to_gpu() model.to_gpu() chainer.config.train = False img = utils.read_image(args.image) # detect persons bboxes, labels, scores = detector.predict([img]) bbox, label, score = bboxes[0], labels[0], scores[0] # expand bboxes and crop the image img = img / 255. img = img.astype(np.float32) img_persons = list() bbox_persons = list() for ymin, xmin, ymax, xmax in bbox: scale = ymax - ymin # this is for ankle (also used in training with mpii dataset) offset = 15 / 200 * scale center = (xmin + xmax) / 2, (ymin + ymax) / 2 + offset # this is for ankle (also used in training with mpii dataset) scale *= 1.25 xmin, xmax = center[0] - scale / 2, center[0] + scale / 2 ymin, ymax = center[1] - scale / 2, center[1] + scale / 2 # truncate xmin = int(max(0, xmin)) ymin = int(max(0, ymin)) xmax = int(min(img.shape[2], xmax)) ymax = int(min(img.shape[1], ymax)) # croping img_person = img[:, ymin:ymax, xmin:xmax] img_person = transforms.resize(img_person, (256, 256)) img_persons.append(img_person) bbox_persons.append((ymin, xmin, ymax, xmax)) img_persons = np.array(img_persons) bbox_persons = np.array(bbox_persons) utils.write_image( utils.tile_images((255 * img_persons).astype(np.float32), n_col=2), 'tiled.jpg') # estimate poses if args.gpu >= 0: img_persons = cuda.to_gpu(img_persons) with chainer.no_backprop_mode(): # (R, 3, 256, 256) -> (R, 16, 64, 64) -> (16, 64, 64) _outputs, outputs = model(img_persons) outputs = cuda.to_cpu(outputs.array) R, C, H, W = outputs.shape # heatmap to keypoint # R, C, H, W -> R, C, 2 keypoints = list() for output in outputs: # (16, 64, 64) -> (16, ) output = output.reshape(C, -1).argmax(axis=1) keypoint = np.unravel_index(output, (H, W)) keypoint = np.array(keypoint).T keypoints.append(keypoint) # keypoint (local) to keypoint (global) keypoint_persons = list() for keypoint, bbox_person in zip(keypoints, bbox_persons): ymin, xmin, ymax, xmax = bbox_person keypoint = transforms.resize_point(keypoint, (H, W), (ymax - ymin, xmax - xmin)) keypoint_person = keypoint + np.array((ymin, xmin)) keypoint_persons.append(keypoint_person) # visualize img = cv2.imread(args.image) visualizer = MPIIVisualizer() img_pose = img.copy() for keypoint_person, bbox_person in zip(keypoint_persons, bbox_persons): ymin, xmin, ymax, xmax = bbox_person img_pose = visualizer.run(img_pose, keypoint_person) img_pose = cv2.rectangle(img_pose, (xmin, ymin), (xmax, ymax), (0, 255, 255), 10) cv2.imwrite('input.jpg', img) cv2.imwrite('output.jpg', img_pose)