def evaluate(model, dataset, hand_param, debug): transformed_dataset = TransformDataset(dataset, model.encode) avg_distances = [] max_distances = [] length = len(transformed_dataset) if not debug else 10 for idx in tqdm.tqdm(range(length)): image, gt_2dj, gt_3dj = transformed_dataset.get_example(idx) example = dataset.get_example(idx) pred_j = model.predict(np.array([image], dtype=np.float32)) with chainer.using_config('train', False): loss = model.forward( np.expand_dims(image, axis=0), np.expand_dims(gt_3dj, axis=0), np.expand_dims(gt_2dj, axis=0), ) pred_j = pred_j.array.reshape(hand_param["n_joints"], -1) dim = pred_j.shape[-1] if dim == 5: pred_3d = pred_j[:, :3] pred_2d = pred_j[:, 3:] else: pred_3d = pred_j logger.debug("> {}".format(pred_j)) logger.debug("> loss {}".format(loss)) logger.debug("> visualize pred_joint") z_half = hand_param["cube"][0] / 2 pred_3d = z_half * pred_3d gt_3dj = example["rgb_joint"] if hand_param["use_rgb"] else example[ "depth_joint"] gt_3dj = gt_3dj - calc_com(gt_3dj) dist = np.sqrt(np.sum(np.square(pred_3d - gt_3dj), axis=1)) avg_dist = np.mean(dist) max_dist = np.max(dist) avg_distances.append(avg_dist) max_distances.append(max_dist) print(np.array(avg_distances).mean()) max_distances = np.array(max_distances) ps = [] max_threshold = 80 for threshold in range(3, max_threshold): oks = np.sum(max_distances <= threshold) percent = 100 * (oks / len(max_distances)) ps.append(percent) fig = plt.figure() ax = fig.add_subplot(111) ax.set_xlabel("Distance threshold / mm") ax.set_ylabel("Fraction of frames iwth mean below distance / %") ax.set_ylim(0, 100) ax.set_xlim(0, max_threshold) ax.plot(ps) ax.grid(True, linestyle="--") plt.savefig("plot.png")
def visualize_dataset(config): from matplotlib import pyplot as plt dataset = select_dataset(config, return_data=["train_set"]) hand_class = config.get('model_param', 'hand_class').split(",") hand_class = [k.strip() for k in hand_class] class_converter, flip_converter = create_converter(hand_class) logger.info("hand_class = {}".format(hand_class)) logger.info("done get dataset") idx = random.randint(0, len(dataset) - 1) logger.info("get example") rgb, rgb_bbox, rgb_class = dataset.get_example(idx) logger.info("Done get example") fig = plt.figure(figsize=(5, 10)) ax1 = fig.add_subplot(211) ax2 = fig.add_subplot(212) label = rgb_class class_converter = {v: k for k, v in class_converter.items()} color = [COLOR_MAP[class_converter[c]] for c in label] print(label) vis_bbox( rgb, rgb_bbox, instance_colors=color, label=label, label_names=hand_class, ax=ax1, ) model = create_ssd_model() transform_dataset = TransformDataset( dataset, Transform(model.coder, model.insize, model.mean, train=True)) img, mb_loc, mb_label = transform_dataset.get_example(idx) mb_color = [COLOR_MAP[class_converter[c]] for c in mb_label] vis_bbox( img, mb_loc, instance_colors=mb_color, label=mb_label, label_names=hand_class, ax=ax2, ) plt.savefig("vis.png") plt.show()
# # visualize transformed dataset # + from collections import defaultdict from chainer.datasets import TransformDataset from pose.models.selector import select_model from pose.hand_dataset import common_dataset config = defaultdict(dict) config["model"]["name"] = "ganerated" hand_param["inH"] = 224 hand_param["inW"] = 224 hand_param["inC"] = 3 hand_param["n_joints"] = common_dataset.NUM_KEYPOINTS hand_param["edges"] = common_dataset.EDGES model = select_model(config, hand_param) transform_dataset = TransformDataset(dataset, model.encode) # + print(current_idx) rgb, hm, intermediate3d, rgb_joint = transform_dataset.get_example(current_idx) from matplotlib import pyplot as plt from mpl_toolkits.mplot3d import Axes3D fig = plt.figure() ax = fig.add_subplot(121) ax.imshow(np.max(hm, axis=0)) ax2 = fig.add_subplot(122, projection="3d") ax2.scatter(*rgb_joint[:, ::-1].transpose())
def main(): parser = argparse.ArgumentParser() parser.add_argument('dataset', help="path to train json file") parser.add_argument('test_dataset', help="path to test dataset json file") parser.add_argument( '--dataset-root', help= "path to dataset root if dataset file is not already in root folder of dataset" ) parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd512') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--gpu', type=int, nargs='*', default=[]) parser.add_argument('--out', default='result') parser.add_argument('--resume') parser.add_argument('--lr', type=float, default=0.001, help="default learning rate") parser.add_argument('--port', type=int, default=1337, help="port for bbox sending") parser.add_argument('--ip', default='127.0.0.1', help="destination ip for bbox sending") parser.add_argument( '--test-image', help="path to test image that shall be displayed in bbox vis") args = parser.parse_args() if args.dataset_root is None: args.dataset_root = os.path.dirname(args.dataset) if args.model == 'ssd300': model = SSD300(n_fg_class=1, pretrained_model='imagenet') image_size = (300, 300) elif args.model == 'ssd512': model = SSD512(n_fg_class=1, pretrained_model='imagenet') image_size = (512, 512) else: raise NotImplementedError("The model you want to train does not exist") model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) train = TransformDataset( SheepDataset(args.dataset_root, args.dataset, image_size=image_size), Transform(model.coder, model.insize, model.mean)) if len(args.gpu) > 1: gpu_datasets = split_dataset_n_random(train, len(args.gpu)) if not len(gpu_datasets[0]) == len(gpu_datasets[-1]): adapted_second_split = split_dataset(gpu_datasets[-1], len(gpu_datasets[0]))[0] gpu_datasets[-1] = adapted_second_split else: gpu_datasets = [train] train_iter = [ ThreadIterator(gpu_dataset, args.batchsize) for gpu_dataset in gpu_datasets ] test = SheepDataset(args.dataset_root, args.test_dataset, image_size=image_size) test_iter = chainer.iterators.MultithreadIterator(test, args.batchsize, repeat=False, shuffle=False, n_threads=2) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.Adam(alpha=args.lr) optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) if len(args.gpu) <= 1: updater = training.updaters.StandardUpdater( train_iter[0], optimizer, device=args.gpu[0] if len(args.gpu) > 0 else -1, ) else: updater = training.updaters.MultiprocessParallelUpdater( train_iter, optimizer, devices=args.gpu) updater.setup_workers() if len(args.gpu) > 0 and args.gpu[0] >= 0: chainer.backends.cuda.get_device_from_id(args.gpu[0]).use() model.to_gpu() trainer = training.Trainer(updater, (200, 'epoch'), args.out) trainer.extend(DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names), trigger=(1000, 'iteration')) # build logger # make sure to log all data necessary for prediction log_interval = 100, 'iteration' data_to_log = { 'image_size': image_size, 'model_type': args.model, } # add all command line arguments for argument in filter(lambda x: not x.startswith('_'), dir(args)): data_to_log[argument] = getattr(args, argument) # create callback that logs all auxiliary data the first time things get logged def backup_train_config(stats_cpu): if stats_cpu['iteration'] == log_interval: stats_cpu.update(data_to_log) trainer.extend( extensions.LogReport(trigger=log_interval, postprocess=backup_train_config)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(5000, 'iteration')) if args.test_image is not None: plot_image = train._dataset.load_image(args.test_image, resize_to=image_size) else: plot_image, _, _ = train.get_example(0) plot_image += train._transform.mean bbox_plotter = BBOXPlotter( plot_image, os.path.join(args.out, 'bboxes'), send_bboxes=True, upstream_port=args.port, upstream_ip=args.ip, ) trainer.extend(bbox_plotter, trigger=(10, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
def predict_sample(model, dataset, hand_param): transformed_dataset = TransformDataset(dataset, model.encode) idx = np.random.randint(0, len(transformed_dataset)) image, gt_2dj, gt_3dj = transformed_dataset.get_example(idx) example = dataset.get_example(idx) vis_vu = gt_2dj * np.array([[hand_param["inH"], hand_param["inW"]]]) pred_j = model.predict(np.array([image], dtype=np.float32)) with chainer.using_config('train', False): loss = model.forward( np.expand_dims(image, axis=0), np.expand_dims(gt_3dj, axis=0), np.expand_dims(gt_2dj, axis=0), ) pred_j = pred_j.array.reshape(hand_param["n_joints"], -1) dim = pred_j.shape[-1] if dim == 5: pred_3d = pred_j[:, :3] pred_2d = pred_j[:, 3:] pred_2d = pred_2d * np.array([[hand_param["inH"], hand_param["inW"]]]) else: pred_3d = pred_j logger.info("> {}".format(pred_j)) logger.info("> loss {}".format(loss)) logger.info("> visualize pred_joint") plot_direction = "horizontal" if plot_direction == "horizontal": space = (1, 2) figsize = (10, 5) else: space = (2, 1) figsize = (5, 10) z_half = hand_param["cube"][0] / 2 pred_3d = z_half * pred_3d gt_3dj = example["rgb_joint"] if hand_param["use_rgb"] else example[ "depth_joint"] gt_3dj = gt_3dj - calc_com(gt_3dj) distance = np.sqrt(np.sum(np.square(pred_3d - gt_3dj), axis=1)).mean() logger.info("> mean distance {:0.2f}".format(distance)) fig = plt.figure(figsize=figsize) fig.suptitle("mean distance = {:0.2f}".format(distance)) ax1 = fig.add_subplot(*space, 1) ax1.set_title("result 2D") ax2 = fig.add_subplot(*space, 2, projection="3d") ax2.set_title("result 3D") color_map = hand_param["color_map"] keypoint_names = hand_param["keypoint_names"] edges = hand_param["edges"] color = [color_map[k] for k in keypoint_names] pred_color = [color_map[s, t] for s, t in edges] gt2_color = [[255, 255, 255] for k in keypoint_names] gt3_color = [[50, 50, 50] for k in keypoint_names] if hand_param["use_rgb"]: image = denormalize_rgb(image) chainercv.visualizations.vis_image(image, ax=ax1) else: image = image.squeeze() ax1.imshow(image, cmap="gray") vis_pose(vis_vu, edges, point_color=color, edge_color=gt2_color, ax=ax1) vis_pose(pred_2d, edges, point_color=color, edge_color=pred_color, ax=ax1) vis_pose(gt_3dj, edges, point_color=color, edge_color=gt3_color, ax=ax2) if dim != 2: vis_pose(pred_3d, edges, point_color=color, edge_color=pred_color, ax=ax2) # set layout for ax in [ax2]: ax.set_xlabel("x") ax.set_ylabel("y") ax.set_zlabel("z") ax.view_init(-65, -90) # show plt.show()