Exemplo n.º 1
0
def main(args):
    os.environ['CUDA_VISIBLE_DEVICES'] = '3'
    # create checkpoint dir
    if not isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    # create model
    model = network.__dict__[cfg.model](cfg.output_shape, cfg.num_class, pretrained = False)
    model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    criterion1 = torch.nn.MSELoss().cuda() # for Global loss
    criterion2 = torch.nn.MSELoss(reduce=False).cuda() # for refine loss
    optimizer = torch.optim.Adam(model.parameters(),
                                lr = cfg.lr,
                                weight_decay=cfg.weight_decay)
    
    if args.resume:
        if isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            pretrained_dict = checkpoint['state_dict']
            model.load_state_dict(pretrained_dict)
            args.start_epoch = checkpoint['epoch']
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.resume, checkpoint['epoch']))
            logger = Logger(join(args.checkpoint, 'log.txt'), resume=True)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    else:        
        logger = Logger(join(args.checkpoint, 'log.txt'))
        logger.set_names(['Epoch', 'LR', 'Train Loss'])

    cudnn.benchmark = True
    print('    Total params: %.2fMB' % (sum(p.numel() for p in model.parameters())/(1024*1024)*4))

    train_loader = torch.utils.data.DataLoader(
        MscocoMulti(cfg),
        batch_size=cfg.batch_size*args.num_gpus, shuffle=True,
        num_workers=args.workers, pin_memory=True) 

    for epoch in range(args.start_epoch, args.epochs):
        lr = adjust_learning_rate(optimizer, epoch, cfg.lr_dec_epoch, cfg.lr_gamma)
        print('\nEpoch: %d | LR: %.8f' % (epoch + 1, lr)) 

        # train for one epoch
        train_loss = train(train_loader, model, [criterion1, criterion2], optimizer)
        print('train_loss: ',train_loss)

        # append logger file
        logger.append([epoch + 1, lr, train_loss])

        save_model({
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'optimizer' : optimizer.state_dict(),
        }, checkpoint=args.checkpoint)

    logger.close()
Exemplo n.º 2
0
def main(args):
    # model = load_flattened_model_val(args.checkpoint, args.test)
    model = load_model_val(args.checkpoint, args.test)

    test_loader = torch.utils.data.DataLoader(MscocoMulti(cfg, train=False),
                                              batch_size=args.batch *
                                              args.num_gpus,
                                              shuffle=False,
                                              num_workers=args.workers,
                                              pin_memory=True)

    print('testing...')
    full_result = []
    for i, (inputs, meta) in tqdm(enumerate(test_loader)):
        # full_result += Predict.predict_val(model, inputs, meta)
        full_result += PredictWithRotation.predict_val(model, inputs, meta, 10)
        if i == 100:
            break

    result_path = args.result
    if not isdir(result_path):
        mkdir_p(result_path)
    result_file = os.path.join(result_path, 'result.json')
    with open(result_file, 'w') as wf:
        json.dump(full_result, wf)

    # evaluate on COCO
    eval_gt = COCO(cfg.ori_gt_path)
    eval_dt = eval_gt.loadRes(result_file)
    cocoEval = COCOeval(eval_gt, eval_dt, iouType='keypoints')
    cocoEval.evaluate()
    cocoEval.accumulate()
    cocoEval.summarize()
Exemplo n.º 3
0
def _save_preds(dataset, data_output_dir):
    """
    Save the PyTorch Dataset of predictions to a file

    :param dataset: The PyTorch Dataset object of predictions
    :param data_output_dir: The filename for the file to save
    """
    # Make directory if it doesn't exists
    if not isdir(data_output_dir):
        mkdir_p(data_output_dir)

    # Just save the predictions in the correct place via PyTorch
    torch.save(dataset, data_output_dir + "/3dposes")
Exemplo n.º 4
0
def visualize_2d_overlay_3d_gt_3d_pred(options):
    """
    Same as visualize_2d_and_3d, but adds a ground truth visualization also.

    Images in the output from left to right are:
    1. original image with 2D pose overlayed
    2. 3D ground truth
    3. 3D prediction visualization

    Options that should be included:
    options.img_dir: the directory for the image
    options.twod_pose_estimations: a PyTorch file containing 2D pose estimations. Assumes the format of a dict,
        keyed by filenames
    options.threed_pose_ground_truths: a PyTorch file containing 3D pose ground truths
    options.threed_pose_estimations: a PyTorch file containing the 3D pose estimations. Assumes the format of a dict,
        keyed by filenames
    options.output_dir: a directory to output each visualization to

    :param options: Options for the visualizations, defined in options.py. (Including defaults).
    """
    # Load the predictions and unpack options
    img_dir = options.img_dir
    twod_pose_preds = torch.load(options.twod_pose_estimations)
    threed_pose_ground_truths = torch.load(options.threed_pose_ground_truths)
    threed_pose_preds = torch.load(options.threed_pose_estimations)
    output_dir = options.output_dir

    # Make dir for output if it doesnt exist
    if not isdir(output_dir):
        mkdir_p(output_dir)

    i = 0
    total = len(twod_pose_preds)

    # Produce a visualization for each input image, outputting to 'output_dir' with the same image name as input
    for filename in os.listdir(img_dir):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            abs_filename = os.path.join(img_dir, filename)
            img = scipy.misc.imread(abs_filename)
            if not abs_filename in twod_pose_preds:
                continue
            twod_overlay = viz_2d_overlay(img, twod_pose_preds[filename])
            threed_gt_viz = viz_3d_pose(threed_pose_ground_truths[filename].numpy())
            threed_pose_viz = viz_3d_pose(threed_pose_preds[filename].numpy())
            final_img = _pack_images([twod_overlay, threed_gt_viz, threed_pose_viz])
            scipy.misc.imsave(os.path.join(output_dir, filename), final_img)

            # progress
            if i % 100 == 0:
                print("Visualized " + str(i) + " out of " + str(total))
            i += 1
Exemplo n.º 5
0
def _save_preds(pred_2d, pred_3d, gt_2d, gt_3d, metas, data_output_dir):
    """
    TODO
    """
    # Make directory if it doesn't exists
    if not isdir(data_output_dir):
        mkdir_p(data_output_dir)

    # Just save the predictions in the correct place via PyTorch
    torch.save(pred_2d, data_output_dir + "/2dpreds")
    torch.save(pred_3d, data_output_dir + "/3dpreds")
    torch.save(gt_2d, data_output_dir + "/2dgt")
    torch.save(gt_3d, data_output_dir + "/3dgt")
    torch.save(metas, data_output_dir + "/metas")
Exemplo n.º 6
0
def graph_PCKh_scores(options):
    """
    Script that takes a list of predictions and plots the PCKh curves and saves the figure to a file

    Required options:
    options.prediction_files - a space seperated list of prediction files (output as part of the model checkpointing)
    options.model_names - a space seperated list of model names, used in the figure
    options.output_dir - specifies a directory to save the graph as an image as

    :param options: Options for the evaluation, defined in options.py. (Including defaults).
    """
    pred_files = options.prediction_files
    model_names = options.model_names

    if len(pred_files) != len(model_names):
        raise Exception("Options must be the same length")

    if not isdir(options.output_dir):
        mkdir_p(options.output_dir)

    curves = {}
    for i in range(len(model_names)):
        curves[model_names[i]] = compute_PCKh_curve(pred_files[i],
                                                    model_names[i])

    for key in curves[model_names[0]]:
        fig = plt.figure(figsize=(10.0, 10.0))
        for model in model_names:
            plt.plot(np.arange(0.0, 0.5, 0.01),
                     curves[model][key],
                     label=model)
        plt.legend()
        plt.xlabel("Threshold")
        plt.ylabel("% joints correct")
        plt.title("PCKh curves")

        # convert fig to a numpy array
        # see: https://stackoverflow.com/questions/7821518/matplotlib-save-plot-to-numpy-array
        fig.canvas.draw()
        data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
        data = data.reshape(fig.canvas.get_width_height()[::-1] + (3, ))

        # avoid unecessary memory consumption
        plt.close(fig)

        # save
        filename = os.path.join(options.output_dir,
                                "graph_{joint}_PCKh.jpg".format(joint=key))
        scipy.misc.imsave(filename, data)
Exemplo n.º 7
0
def _save_preds(twod_predictions, threed_predictions, data_output_dir):
    """
    Save the PyTorch set of predictions to a file

    :param twod_predictions: The map object of 2D predictions that we wish to save
    :param threed_predictions: The map object of 3D predictions that we wish to save
    :param data_output_dir: The filename for the file to save
    """
    # Make directory if it doesn't exists
    if not isdir(data_output_dir):
        mkdir_p(data_output_dir)

    # Just save the predictions in the correct place via PyTorch
    torch.save(twod_predictions, data_output_dir + "/2dpreds")
    torch.save(threed_predictions, data_output_dir + "/3dpreds")
Exemplo n.º 8
0
def visualize_2d_overlay(options):
    """
    Unpacks options and makes visualizations for 2d and 3d predictions.

    Images in the output from left to right are:
    1. Original image with 2D pose overlayed
    2. 3D prediction visualization

    Options that should be included:
    options.img_dir: the directory for the image
    options.twod_pose_estimations: a PyTorch file containing 2D pose estimations. Assumed to be a dict keyed by filenames
    options.output_dir: a directory to output each visualization to

    :param options: Options for the visualizations, defined in options.py. (Including defaults).
    """
    # Load the predictions and unpack options
    img_dir = options.img_dir
    twod_pose_preds = torch.load(options.twod_pose_estimations)
    output_dir = options.output_dir

    # Make dir for output if it doesnt exist
    if not isdir(output_dir):
        mkdir_p(output_dir)

    i = 0
    total = len(os.listdir(img_dir))

    # Produce a visualization for each input image, outputting to 'output_dir' with the same image name as input
    for filename in os.listdir(img_dir):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            abs_filename = os.path.join(img_dir, filename)
            if not filename in twod_pose_preds:
                continue
            img = scipy.misc.imread(abs_filename)
            twod_overlay = viz_2d_overlay(img, twod_pose_preds[filename])
            scipy.misc.imsave(os.path.join(output_dir, filename), twod_overlay)

            # progress
            if i % 100 == 0:
                print("Visualized " + str(i) + " out of " + str(total))
            i += 1
Exemplo n.º 9
0
def visualize_2d_pred_3d_gt_3d_pred(options):
    """
    Visualize the 2D and 3D pose estimations on matplotlib axes. This is just an interface for twod_threed's
    visualizations

    Options that should be included:
    options.twod_pose_ground_truths: a PyTorch file containing 2D pose ground truths.
    options.threed_pose_ground_truths: a PyTorch file containing 3D pose ground truths.
    options.threed_pose_estimations: a PyTorch file containing 3D pose estimations.
    options.output_dir: A directory to output each visualization to

    :param options: Options for the visualizations, defined in options.py. (Including defaults).
    """
    # Unpack options
    twod_pose_ground_truths = torch.load(options.twod_pose_ground_truths)
    threed_pose_preds = torch.load(options.threed_pose_estimations)
    output_dir = options.output_dir

    # Make dir for output if it doesnt exist
    if not isdir(output_dir):
        mkdir_p(output_dir)

    i = 0
    total = len(twod_pose_ground_truths)

    # Loop through each pose (each item in the dict is an array (in time) of 2d poses
    for k2d in twod_pose_ground_truths:
        k3d = get_3d_key_from_2d_key(k2d)
        for t in range(min(len(twod_pose_ground_truths[k2d]), 100)):
            twod_gt_viz = viz_2d_pose(twod_pose_ground_truths[k2d][t])
            threed_gt_viz = viz_3d_pose(threed_pose_ground_truths[k3d][t])
            threed_pred_viz = viz_3d_pose(threed_pose_preds[k2d][t].numpy())

            final_img = _pack_images([twod_gt_viz, threed_gt_viz, threed_pred_viz])
            scipy.misc.imsave(os.path.join(output_dir, str(k2d)+"_"+str(t)+".jpg"), final_img)

            # progress
            if i % 100 == 0:
                print("Visualized " + str(i) + " out of " + str(total))
            i += 1
Exemplo n.º 10
0
def main(args):
    # create model
    model = network.__dict__[cfg.model](cfg.output_shape, cfg.num_class, pretrained=False)
    model = torch.nn.DataParallel(model).cuda()

    test_loader = torch.utils.data.DataLoader(
        MscocoMulti(cfg, train=False),
        batch_size=args.batch * args.num_gpus, shuffle=False,
        num_workers=args.workers, pin_memory=True)

    # load trainning weights
    # checkpoint_file = os.path.join(args.checkpoint, args.test + '.pth.tar')
    checkpoint_file = os.path.join('model', 'checkpoint', 'epoch9checkpoint.pth.tar')
    checkpoint = torch.load(checkpoint_file)
    model.load_state_dict(checkpoint['state_dict'])
    print("=> loaded checkpoint '{}' (epoch {})".format(checkpoint_file, checkpoint['epoch']))

    # change to evaluation mode
    model.eval()

    print('testing...')
    full_result = []
    for i, (inputs, meta) in tqdm(enumerate(test_loader)):
        with torch.no_grad():
            input_var = torch.autograd.Variable(inputs.cuda())
            if args.flip == True:
                flip_inputs = inputs.clone()
                for i, finp in enumerate(flip_inputs):
                    finp = im_to_numpy(finp)
                    finp = cv2.flip(finp, 1)
                    flip_inputs[i] = im_to_torch(finp)
                flip_input_var = torch.autograd.Variable(flip_inputs.cuda())

            # compute output
            global_outputs, refine_output = model(input_var)
            score_map = refine_output.data.cpu()
            score_map = score_map.numpy()

            if args.flip == True:
                flip_global_outputs, flip_output = model(flip_input_var)
                flip_score_map = flip_output.data.cpu()
                flip_score_map = flip_score_map.numpy()

                for i, fscore in enumerate(flip_score_map):
                    fscore = fscore.transpose((1, 2, 0))
                    fscore = cv2.flip(fscore, 1)
                    fscore = list(fscore.transpose((2, 0, 1)))
                    for (q, w) in cfg.symmetry:
                        fscore[q], fscore[w] = fscore[w], fscore[q]
                    fscore = np.array(fscore)
                    score_map[i] += fscore
                    score_map[i] /= 2

            # ids = meta['imgID'].numpy()
            det_scores = meta['det_scores']
            for b in range(inputs.size(0)):
                details = meta['augmentation_details']
                imgid = meta['imgid'][b]
                # print(imgid)
                category = meta['category'][b]
                # print(category)
                single_result_dict = {}
                single_result = []

                single_map = score_map[b]
                r0 = single_map.copy()
                r0 /= 255
                r0 += 0.5
                v_score = np.zeros(24)
                for p in range(24):
                    single_map[p] /= np.amax(single_map[p])
                    border = 10
                    dr = np.zeros((cfg.output_shape[0] + 2 * border, cfg.output_shape[1] + 2 * border))
                    dr[border:-border, border:-border] = single_map[p].copy()
                    dr = cv2.GaussianBlur(dr, (21, 21), 0)
                    lb = dr.argmax()
                    y, x = np.unravel_index(lb, dr.shape)
                    dr[y, x] = 0
                    lb = dr.argmax()
                    py, px = np.unravel_index(lb, dr.shape)
                    y -= border
                    x -= border
                    py -= border + y
                    px -= border + x
                    ln = (px ** 2 + py ** 2) ** 0.5
                    delta = 0.25
                    if ln > 1e-3:
                        x += delta * px / ln
                        y += delta * py / ln
                    x = max(0, min(x, cfg.output_shape[1] - 1))
                    y = max(0, min(y, cfg.output_shape[0] - 1))
                    resy = float((4 * y + 2) / cfg.data_shape[0] * (details[b][3] - details[b][1]) + details[b][1])
                    resx = float((4 * x + 2) / cfg.data_shape[1] * (details[b][2] - details[b][0]) + details[b][0])
                    v_score[p] = float(r0[p, int(round(y) + 1e-10), int(round(x) + 1e-10)])
                    single_result.append(resx)
                    single_result.append(resy)
                    single_result.append(1)
                if len(single_result) != 0:
                    result = []
                    result.append(imgid)
                    result.append(category)
                    j = 0
                    while j < len(single_result):
                        result.append(str(int(single_result[j])) + '_' + str(int(single_result[j + 1])) + '_1')
                        j += 3
                    full_result.append(result)

    result_path = args.result
    if not isdir(result_path):
        mkdir_p(result_path)
    result_file = os.path.join(result_path, 'result.csv')
    with open(result_file, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerows(full_result)

    Evaluator = FaiKeypoint2018Evaluator(userAnswerFile=os.path.join(result_path, 'result9.csv'),
                                         standardAnswerFile="fashionAI_key_points_test_a_answer_20180426.csv")
    score = Evaluator.evaluate()

    print(score)

    Evaluator.writerror(result_path=os.path.join(result_path, "toperror1.csv"))
Exemplo n.º 11
0
    net.apply(weight_init)

    # load a pretrained model if required
    #net.load_state_dict(torch.load('path/to/pretrain.pth'))

    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        net = nn.DataParallel(net)
    net.to(device)

    # choose an optimizer
    #optimizer = optim.Adam(net.parameters(), lr=opt.lr, weight_decay=opt.weight_decay)
    optimizer = optim.SGD( net.parameters(), lr=opt.lr, momentum=opt.momentum, weight_decay=opt.wd, nesterov=True)
    #optimizer = optim.RMSprop(net.parameters(), lr=opt.lr, momentum=opt.momentum, weight_decay=opt.weight_decay)

    # choose a learning rate scheduler
    #scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=opt.gamma)
    #scheduler = optim.lr_scheduler.LambdaLR(optimizer, lambda epoch: 0.95**epoch)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
    #scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[30,80], gamma=0.1)

    dataloaders = get_dataloaders(batch_size=opt.batch_size, num_workers=opt.workers)

    # create checkpoint dir
    #opt.output = f'{opt.output}'
    if not isdir(opt.output):
        mkdir_p(opt.output)

    # train model
    train_model(net, dataloaders, optimizer, scheduler, num_epochs=opt.epoch)
Exemplo n.º 12
0
def _main_regression(opt):
    """
    Main training loop for the 3D baseline
    """
    start_epoch = 0
    err_best = 1000
    glob_step = 0
    lr_now = opt.lr

    # save options
    log.save_options(opt, opt.checkpoint_dir)

    # Make a summary writer
    writer = SummaryWriter(log_dir="%s/2d3d_h36m_%s_tb_log" % (opt.tb_dir, opt.exp))

    # create model
    print(">>> creating model")
    model = LinearModel(dataset_normalized_input=opt.dataset_normalization)
    model = model.cuda()
    model.apply(weight_init)
    print(">>> total params: {:.2f}M".format(sum(p.numel() for p in model.parameters()) / 1000000.0))
    criterion = nn.MSELoss(size_average=True).cuda()
    optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr)

    # load ckpt
    if opt.load:
        print(">>> loading ckpt from '{}'".format(opt.load))
        ckpt = torch.load(opt.load)
        start_epoch = ckpt['epoch']
        err_best = ckpt['err']
        glob_step = ckpt['step']
        lr_now = ckpt['lr']
        model.load_state_dict(ckpt['state_dict'])
        optimizer.load_state_dict(ckpt['optimizer'])
        print(">>> ckpt loaded (epoch: {} | err: {})".format(start_epoch, err_best))
    if opt.resume:
        logger = log.Logger(os.path.join(opt.checkpoint_dir, 'log.txt'), resume=True)
    else:
        logger = log.Logger(os.path.join(opt.checkpoint_dir, 'log.txt'))
        logger.set_names(['epoch', 'lr', 'loss_train', 'loss_test', 'err_test'])

    # list of action(s)
    actions = misc.define_actions(opt.action)
    num_actions = len(actions)
    print(">>> actions to use (total: {}):".format(num_actions))
    pprint(actions, indent=4)
    print(">>>")
    # data loading

    # data loading
    print(">>> loading data")
    # load dadasets for training
    train_dataset, train_loader, test_loader = _make_torch_data_loaders(opt, actions)
    stat_3d = train_dataset.get_stat_3d()
    print(">>> data loaded !")

    cudnn.benchmark = True
    for epoch in range(start_epoch, opt.epochs):
        print('==========================')
        print('>>> epoch: {} | lr: {:.5f}'.format(epoch + 1, lr_now))
        # per epoch
        glob_step, lr_now, loss_train = _train(
            train_loader, model, criterion, optimizer, writer,
            lr_init=opt.lr, lr_now=lr_now, glob_step=glob_step, lr_decay=opt.lr_decay, gamma=opt.lr_gamma,
            no_grad_clipping=opt.no_grad_clipping, grad_clip=opt.grad_clip, tb_log_freq=opt.tb_log_freq,
            use_horovod=opt.use_horovod)
        loss_test, err_test = _test(test_loader, model, criterion, opt.dataset_normalization, procrustes=opt.procrustes)

        # Update tensorboard summaries
        writer.add_scalars('data/epoch/loss', {'train_loss': loss_train, 'test_loss': loss_test}, epoch)
        writer.add_scalar('data/epoch/validation_error', err_test, epoch)

        # update log file
        logger.append([epoch + 1, lr_now, loss_train, loss_test, err_test],
                      ['int', 'float', 'float', 'float', 'float'])

        # save ckpt
        model_specific_checkpoint_dir = "%s/2d3d_h36m_%s" % (opt.checkpoint_dir, opt.exp)
        if not isdir(model_specific_checkpoint_dir):
            mkdir_p(model_specific_checkpoint_dir)
        is_best = err_test < err_best
        err_best = min(err_test, err_best)
        if is_best:
            log.save_ckpt({'epoch': epoch + 1,
                           'lr': lr_now,
                           'step': glob_step,
                           'err': err_best,
                           'state_dict': model.state_dict(),
                           'optimizer': optimizer.state_dict()},
                          ckpt_path=model_specific_checkpoint_dir,
                          is_best=True)
        log.save_ckpt({'epoch': epoch + 1,
                       'lr': lr_now,
                       'step': glob_step,
                       'err': err_best,
                       'state_dict': model.state_dict(),
                       'optimizer': optimizer.state_dict()},
                      ckpt_path=model_specific_checkpoint_dir,
                      is_best=False)
    logger.close()
    writer.close()
Exemplo n.º 13
0
def main(args):
    # create model
    model = network.__dict__[cfg.model](cfg.output_shape,
                                        cfg.num_class,
                                        pretrained=False)
    model = torch.nn.DataParallel(model).cuda()

    test_loader = torch.utils.data.DataLoader(MscocoMulti(cfg, train=False),
                                              batch_size=args.batch *
                                              args.num_gpus,
                                              shuffle=False,
                                              num_workers=args.workers,
                                              pin_memory=True)

    # load trainning weights
    checkpoint_file = os.path.join(args.checkpoint, args.test + '.pth.tar')
    checkpoint = torch.load(checkpoint_file)
    model.load_state_dict(checkpoint['state_dict'])
    print("=> loaded checkpoint '{}' (epoch {})".format(
        checkpoint_file, checkpoint['epoch']))

    # change to evaluation mode
    model.eval()

    print('testing...')
    full_result = []
    for i, (inputs, meta) in tqdm(enumerate(test_loader)):
        # print(i)
        # print(inputs.shape)
        with torch.no_grad():
            input_var = torch.autograd.Variable(inputs.cuda())
            if args.flip == True:
                flip_inputs = inputs.clone()

                #   k = 0
                for i, finp in enumerate(flip_inputs):
                    finp = im_to_numpy(finp)
                    finp = cv2.flip(finp, 1)
                    flip_inputs[i] = im_to_torch(finp)
            #  print(k)
            #  print(1111111111111111)
                flip_input_var = torch.autograd.Variable(flip_inputs.cuda())

            # compute output
            global_outputs, refine_output = model(input_var)
            score_map = refine_output.data.cpu()
            score_map = score_map.numpy()
            #     print(score_map.shape)
            # score_map (128,2,64,48)
            #      xx = inputs.numpy()
            #    print(xx[0].transpose((1,2,0)).shape)
            #      plt.figure(1)
            #     plt.subplot(121)
            #     plt.imshow(xx[0].transpose((1,2,0)))
            #
            #   plt.subplot(122)
            #   plt.imshow(score_map[0][0], cmap='gray', interpolation='nearest')
            #   plt.show()
            if args.flip == True:
                flip_global_outputs, flip_output = model(flip_input_var)
                flip_score_map = flip_output.data.cpu()
                flip_score_map = flip_score_map.numpy()

                for i, fscore in enumerate(flip_score_map):

                    fscore = fscore.transpose((1, 2, 0))
                    fscore = cv2.flip(fscore, 1)
                    #   fscore=fscore[:, :,np.newaxis]
                    #  print(fscore.shape)  # (64,48,2)
                    #  print(2222222222222)
                    fscore = list(fscore.transpose((2, 0, 1)))
                    #  for (q, w) in cfg.symmetry:
                    #     fscore[q], fscore[w] = fscore[w], fscore[q]
                    fscore = np.array(fscore)
                    score_map[i] += fscore
                    score_map[i] /= 2
                # print(score_map[i].shape)
                #  print(score_map.shape)   (128,2,64.48)

            ids = meta['imgID'].numpy()
            imgclass = meta['class']
            #  print(ids)
            det_scores = meta['det_scores']
            for b in range(inputs.size(0)):

                #  print(inputs.size(0))
                details = meta['augmentation_details']
                single_result_dict = {}
                single_result = []

                single_map = score_map[b]  #(2,64,48)
                #   print(single_map.shape)
                r0 = single_map.copy()
                r0 /= 255
                r0 += 0.5
                v_score = np.zeros(10)
                if imgclass[b] == 'chair':
                    c = 0
                elif imgclass[b] == 'bed':
                    c = 1
                elif imgclass[b] == 'sofa':
                    c = 2

                single_map[c] /= np.amax(single_map[c])
                border = 9
                ps = parseHeatmap(single_map[c], thresh=0.20)  #shape 2
                #        print(len(ps[0]))
                #      print(len(ps[1]))
                #     print(1111111111)
                #     plt.imshow(single_map[c], cmap='gray', interpolation='nearest')
                #     plt.show()
                #  print(len(ps[0]))
                for k in range(len(ps[0])):
                    x = ps[0][k] - border  # height
                    y = ps[1][k] - border  # width
                    #   print(cfg.data_shape[0]) # height
                    #   print(cfg.data_shape[1])  # width
                    resy = float((4 * x + 2) / cfg.data_shape[0] *
                                 (details[b][3] - details[b][1]) +
                                 details[b][1])
                    resx = float((4 * y + 2) / cfg.data_shape[1] *
                                 (details[b][2] - details[b][0]) +
                                 details[b][0])
                    #   print(resx,resy)
                    single_result.append(resx)
                    single_result.append(resy)
                    single_result.append(1)
                if len(single_result) != 0:
                    single_result_dict['image_id'] = int(ids[b])
                    single_result_dict['class'] = imgclass[b]
                    single_result_dict['keypoints'] = single_result
                    #     single_result_dict['score'] = float(det_scores[b])*v_score.mean()
                    full_result.append(single_result_dict)

    result_path = args.result
    if not isdir(result_path):
        mkdir_p(result_path)
    result_file = os.path.join(result_path, 'result.json')
    with open(result_file, 'w') as wf:
        json.dump(full_result, wf)
Exemplo n.º 14
0
def main():
    args = parse_args()
    update_config(cfg_hrnet, args)

    # create checkpoint dir
    if not isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    # create model
    #print('networks.'+ cfg_hrnet.MODEL.NAME+'.get_pose_net')
    model = eval('models.' + cfg_hrnet.MODEL.NAME + '.get_pose_net')(
        cfg_hrnet, is_train=True)
    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    # show net
    args.channels = 3
    args.height = cfg.data_shape[0]
    args.width = cfg.data_shape[1]
    #net_vision(model, args)

    # define loss function (criterion) and optimizer
    criterion = torch.nn.MSELoss(reduction='mean').cuda()

    #torch.optim.Adam
    optimizer = AdaBound(model.parameters(),
                         lr=cfg.lr,
                         weight_decay=cfg.weight_decay)

    if args.resume:
        if isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            pretrained_dict = checkpoint['state_dict']
            model.load_state_dict(pretrained_dict)
            args.start_epoch = checkpoint['epoch']
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
            logger = Logger(join(args.checkpoint, 'log.txt'), resume=True)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    else:
        logger = Logger(join(args.checkpoint, 'log.txt'))
        logger.set_names(['Epoch', 'LR', 'Train Loss'])

    cudnn.benchmark = True
    torch.backends.cudnn.enabled = True
    print('    Total params: %.2fMB' %
          (sum(p.numel() for p in model.parameters()) / (1024 * 1024) * 4))

    train_loader = torch.utils.data.DataLoader(
        #MscocoMulti(cfg),
        KPloader(cfg),
        batch_size=cfg.batch_size * len(args.gpus))
    #, shuffle=True,
    #num_workers=args.workers, pin_memory=True)

    #for i, (img, targets, valid) in enumerate(train_loader):
    #    print(i, img, targets, valid)

    for epoch in range(args.start_epoch, args.epochs):
        lr = adjust_learning_rate(optimizer, epoch, cfg.lr_dec_epoch,
                                  cfg.lr_gamma)
        print('\nEpoch: %d | LR: %.8f' % (epoch + 1, lr))

        # train for one epoch
        train_loss = train(train_loader, model, criterion, optimizer)
        print('train_loss: ', train_loss)

        # append logger file
        logger.append([epoch + 1, lr, train_loss])

        save_model(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
            },
            checkpoint=args.checkpoint)

    logger.close()
Exemplo n.º 15
0
def main():
    args = parse_args()

    # create checkpoint dir
    if not isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    # create model
    model = network.__dict__[cfg.model](cfg.channel_settings,
                                        cfg.output_shape,
                                        cfg.num_class,
                                        pretrained=True)

    # show net
    args.channels = 3
    args.height = cfg.data_shape[0]
    args.width = cfg.data_shape[1]
    #net_vision(model, args)

    if 1:
        if isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            model.load_state_dict(checkpoint['state_dict'])
            args.start_epoch = checkpoint['epoch']
            lr = checkpoint['lr']
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
            logger = Logger(join(args.checkpoint, 'log.txt'), resume=True)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    else:
        lr = cfg.lr
        logger = Logger(join(args.checkpoint, 'log.txt'))
        logger.set_names(['Epoch', 'LR', 'Train Loss'])

    # define loss function (criterion) and optimizer
    criterion1 = torch.nn.MSELoss().cuda()  # for Global loss
    criterion2 = torch.nn.MSELoss(reduce=False).cuda()  # for refine loss

    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    cudnn.benchmark = True
    torch.backends.cudnn.enabled = True
    print('    Total params: %.2fMB' %
          (sum(p.numel() for p in model.parameters()) / (1024 * 1024) * 4))

    train_loader = torch.utils.data.DataLoader(
        #MscocoMulti(cfg),
        KPloader(cfg),
        batch_size=cfg.batch_size * len(args.gpus))
    #, shuffle=True,
    #num_workers=args.workers, pin_memory=True)

    #torch.optim.Adam
    optimizer = AdaBound(model.parameters(),
                         lr=lr,
                         weight_decay=cfg.weight_decay)

    for epoch in range(args.start_epoch, args.epochs):
        lr = adjust_learning_rate(optimizer, epoch, cfg.lr_dec_epoch,
                                  cfg.lr_gamma)
        print('\nEpoch: %d | LR: %.8f' % (epoch + 1, lr))

        # train for one epoch
        train_loss = train(train_loader, model, [criterion1, criterion2],
                           optimizer)
        print('train_loss: ', train_loss)

        # append logger file
        logger.append([epoch + 1, lr, train_loss])

        #save_model({
        #    'epoch': epoch + 1,
        #    'state_dict': model.state_dict(),
        #    'optimizer' : optimizer.state_dict(),
        #}, checkpoint=args.checkpoint)

        state_dict = model.module.state_dict()
        for key in state_dict.keys():
            state_dict[key] = state_dict[key].cpu()
        torch.save({
            'epoch': epoch + 1,
            'state_dict': state_dict,
            'lr': lr,
        },
                   os.path.join(args.checkpoint,
                                "epoch" + str(epoch + 1) + "checkpoint.ckpt"))
        print("=> Save model done! the path: ", \
              os.path.join(args.checkpoint, "epoch" + str(epoch + 1) + "checkpoint.ckpt"))

    logger.close()
Exemplo n.º 16
0
def test(test_loader, model):
    model.eval()

    print('testing...')
    full_result = []
    flip = True
    for i, (inputs, meta) in tqdm(enumerate(test_loader)):
        with torch.no_grad():
            input_var = torch.autograd.Variable(inputs.cuda())
            if flip == True:
                flip_inputs = inputs.clone()
                for i, finp in enumerate(flip_inputs):
                    finp = im_to_numpy(finp)
                    finp = cv2.flip(finp, 1)
                    flip_inputs[i] = im_to_torch(finp)
                flip_input_var = torch.autograd.Variable(flip_inputs.cuda())

            # compute output
            global_outputs, refine_output = model(input_var)
            score_map = refine_output.data.cpu()
            score_map = score_map.numpy()

            if flip == True:
                flip_global_outputs, flip_output = model(flip_input_var)
                flip_score_map = flip_output.data.cpu()
                flip_score_map = flip_score_map.numpy()

                for i, fscore in enumerate(flip_score_map):
                    fscore = fscore.transpose((1, 2, 0))
                    fscore = cv2.flip(fscore, 1)
                    fscore = list(fscore.transpose((2, 0, 1)))
                    for (q, w) in test_cfg.symmetry:
                        fscore[q], fscore[w] = fscore[w], fscore[q]
                    fscore = np.array(fscore)
                    score_map[i] += fscore
                    score_map[i] /= 2

            ids = meta['imgID'].numpy()
            det_scores = meta['det_scores']
            for b in range(inputs.size(0)):
                details = meta['augmentation_details']
                single_result_dict = {}
                single_result = []

                single_map = score_map[b]
                r0 = single_map.copy()
                r0 /= 255
                r0 += 0.5
                v_score = np.zeros(17)
                for p in range(17):
                    single_map[p] /= np.amax(single_map[p])
                    border = 10
                    dr = np.zeros((test_cfg.output_shape[0] + 2 * border,
                                   test_cfg.output_shape[1] + 2 * border))
                    dr[border:-border, border:-border] = single_map[p].copy()
                    dr = cv2.GaussianBlur(dr, (21, 21), 0)
                    lb = dr.argmax()
                    y, x = np.unravel_index(lb, dr.shape)
                    dr[y, x] = 0
                    lb = dr.argmax()
                    py, px = np.unravel_index(lb, dr.shape)
                    y -= border
                    x -= border
                    py -= border + y
                    px -= border + x
                    ln = (px**2 + py**2)**0.5
                    delta = 0.25
                    if ln > 1e-3:
                        x += delta * px / ln
                        y += delta * py / ln
                    x = max(0, min(x, test_cfg.output_shape[1] - 1))
                    y = max(0, min(y, test_cfg.output_shape[0] - 1))
                    resy = float((4 * y + 2) / test_cfg.data_shape[0] *
                                 (details[b][3] - details[b][1]) +
                                 details[b][1])
                    resx = float((4 * x + 2) / test_cfg.data_shape[1] *
                                 (details[b][2] - details[b][0]) +
                                 details[b][0])
                    v_score[p] = float(r0[p,
                                          int(round(y) + 1e-10),
                                          int(round(x) + 1e-10)])
                    single_result.append(resx)
                    single_result.append(resy)
                    single_result.append(1)
                if len(single_result) != 0:
                    single_result_dict['image_id'] = int(ids[b])
                    single_result_dict['category_id'] = 1
                    single_result_dict['keypoints'] = single_result
                    single_result_dict['score'] = float(
                        det_scores[b]) * v_score.mean()
                    full_result.append(single_result_dict)

    result_path = 'result'
    if not isdir(result_path):
        mkdir_p(result_path)
    result_file = os.path.join(result_path, 'result.json')
    with open(result_file, 'w') as wf:
        json.dump(full_result, wf)

    # evaluate on COCO
    eval_gt = COCO(test_cfg.ori_gt_path)
    eval_dt = eval_gt.loadRes(result_file)
    cocoEval = COCOeval(eval_gt, eval_dt, iouType='keypoints')
    cocoEval.evaluate()
    cocoEval.accumulate()
    cocoEval.summarize()
Exemplo n.º 17
0
def visualize_saliency_and_prob_maps(options, skeleton_overlay=False):
    """
    This is a visualization of 2D joint predictions.

    For each joint we will produce a row of images:
    Original Image, Joint Prediction, Saliency Map, Overlayed Saliency Map

    Then for each image, we will produce one row for each joint, at put them in a big collumn

    Options that should be included:
    options.img_dir: directory for the image(s)
    options.load: specifies the location of the saved (hourglass) model
    options.output_dir: specifies the location to save the (torch dataset of) pose predictions
    <any other model specific options you specified for training, e.g. --use_layer_norm, or --stacks 4>

    :param options: The options used to specify where to load images from and where to save the output etc
    :param skeleton_overlay: If we should overlay the image with a skeleton
    :return: Nothing
    """
    upasample_4x4 = torch.nn.Upsample(scale_factor=4)

    # if output directory doesnt exist, make it
    if not isdir(options.output_dir):
        mkdir_p(options.output_dir)

    # Load model (use helper from StackedHourglass' run.py)
    model, dataset = _load_model_and_dataset(options.load, options.img_dir, options)

    # Iterate through every image
    for i in range(len(dataset)):
        # Progress
        if i % 1 == 0:
            print("At " + str(i) + " out of " + str(len(dataset)) + ".")

        # Get the image, the ground truths and data about the image map
        inputs, targets, meta = dataset[i]
        filename = dataset.anno[dataset.train[i]]['img_paths']

        # Wrap input in a variable and set that it requires a gradient (so we can actually get gradient info)
        inputs_var = torch.autograd.Variable(inputs.unsqueeze(0))
        inputs_var.requires_grad_()

        # Run the model to get the output predictions
        output = model(inputs_var.cuda())
        score_map = output[-1].cpu().data
        joint_preds = final_preds(score_map, [meta['center']], [meta['scale']], [64, 64]).squeeze()

        # Compute the original image, as "inputs" is color normalized, so move from [-1,1] to [0,255]. Also transposes to go from [C,W,H] to [W,H,C], when there is a C dimension
        # If we have joint predictions, then overlay them also
        original_image = inputs.clone()
        color_denormalize(original_image, dataset.mean, dataset.std)
        original_image = original_image.numpy().transpose(1,2,0) * 255.0

        # If we have skeleton information, then, add the original image with skeleton overlay
        abs_filename = os.path.join(options.img_dir, filename)
        img = scipy.misc.imread(abs_filename)
        twod_overlay = viz_2d_overlay(img, joint_preds)

        # Compute the output from the network (which is a list and we only want the last, final set of scores). Output is of shape [1,num_joints,64,64] so squeeze and upsample
        scores = model(inputs_var.cuda())[-1].cpu().squeeze()
        scores_upsampled = upasample_4x4(scores.unsqueeze(0)).squeeze()

        # Saliency map is the gradient of the scores with respect to the scores. We want to do this one joint at a time
        packed_joint_imgs = []
        for joint in range(model.num_classes):
            # Comput saliency, i.e. gradient of the scores w.r.t input image
            joint_scores = scores[joint]
            if options.use_max_for_saliency_map:
                joint_scores_sum_or_max = torch.max(joint_scores)
            else:
                joint_scores_sum_or_max = torch.sum(joint_scores)
            joint_scores_sum_or_max.backward(retain_graph=True)
            saliency = torch.sum(inputs_var.grad.abs().squeeze(), dim=0)

            # Zero out any gradients (for next iteration)
            inputs_var.grad.zero_()
            model.zero_grad()

            # Create the images to stack. (prob dist is just [W,H] in shape, not [C,W,H])
            # Transpose probabilities and saliency maps, 'coz matplotlib seems to take y,x rather than x,y coords
            joint_prob_distr_image = _heatmap_from_prob_scores(scores_upsampled[joint].detach().numpy())
            saliency_image = _heatmap_from_prob_scores(saliency.numpy(), colormap=plt.cm.hot)
            saliency_overlay = _overlay_saliency(original_image, saliency.numpy())

            # Stack these images in a row
            imgs = []
            if skeleton_overlay and twod_overlay is not None: imgs.append(twod_overlay)
            imgs.extend([original_image, joint_prob_distr_image, saliency_image, saliency_overlay])
            packed_joint_imgs.append(_pack_images(imgs))

        # Stack images in a collumn and save
        final_visualization = _pack_images_col(packed_joint_imgs)
        output_filename = os.path.join(options.output_dir, filename)
        scipy.misc.imsave(output_filename, final_visualization)
Exemplo n.º 18
0
def main(args):
    """
    Main training loop for training a stacked hourglass model on MPII dataset.
    :param args: Command line arguments.
    """
    global best_acc

    # create checkpoint dir
    if not isdir(args.checkpoint_dir):
        mkdir_p(args.checkpoint_dir)

    # create model
    print("==> creating model '{}', stacks={}, blocks={}".format(
        args.arch, args.stacks, args.blocks))
    model = HourglassNet(num_stacks=args.stacks,
                         num_blocks=args.blocks,
                         num_classes=args.num_classes,
                         batch_norm_momentum=args.batch_norm_momentum,
                         use_layer_norm=args.use_layer_norm,
                         width=256,
                         height=256)
    joint_visibility_model = JointVisibilityNet(hourglass_stacks=args.stacks)

    # scale weights
    if args.scale_weight_factor != 1.0:
        model.scale_weights_(args.scale_weight_factor)

    # setup horovod and model for parallel execution
    if args.use_horovod:
        hvd.init()
        torch.cuda.set_device(hvd.local_rank())
        args.lr *= hvd.size()
        model.cuda()
    else:
        model = model.cuda()
        if args.predict_joint_visibility:
            joint_visibility_model = joint_visibility_model.cuda()

    # define loss function (criterion) and optimizer
    criterion = torch.nn.MSELoss(size_average=True).cuda()
    joint_visibility_criterion = None if not args.predict_joint_visibility else torch.nn.BCEWithLogitsLoss(
    )
    params = [{'params': model.parameters(), 'lr': args.lr}]
    if args.predict_joint_visibility:
        params.append({
            'params': joint_visibility_model.parameters(),
            'lr': args.lr
        })
    params = model.parameters()
    if not args.use_amsprop:
        optimizer = torch.optim.RMSprop(params,
                                        lr=args.lr,
                                        momentum=args.momentum,
                                        weight_decay=args.weight_decay)
    else:
        optimizer = torch.optim.Adam(params,
                                     lr=args.lr,
                                     weight_decay=args.weight_decay,
                                     amsgrad=True)
    if args.use_horovod:
        optimizer = hvd.DistributedOptimizer(
            optimizer, named_parameters=model.named_parameters())

    # Create a tensorboard writer
    writer = SummaryWriter(log_dir="%s/hourglass_mpii_%s_tb_log" %
                           (args.tb_dir, args.exp))

    # optionally resume from a checkpoint
    title = 'mpii-' + args.arch
    if args.load:
        if isfile(args.load):
            print("=> loading checkpoint '{}'".format(args.load))
            checkpoint = torch.load(args.load)

            # remove old usage of data parallel (used to be wrapped around model) # TODO: remove this when no old models used this
            state_dict = {}
            for key in checkpoint['state_dict']:
                new_key = key[len("module."):] if key.startswith(
                    "module.") else key
                state_dict[new_key] = checkpoint['state_dict'][key]

            # restore state
            args.start_epoch = checkpoint['epoch']
            best_acc = checkpoint['best_acc']
            model.load_state_dict(state_dict)
            if args.predict_joint_visibility:
                joint_visibility_model.load_state_dict(
                    checkpoint['joint_visibility_state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])

            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.load, checkpoint['epoch']))
            logger = Logger(join(args.checkpoint_dir, 'log.txt'),
                            title=title,
                            resume=True)
        else:
            raise Exception("=> no checkpoint found at '{}'".format(args.load))
    else:
        logger = Logger(join(args.checkpoint_dir, 'log.txt'), title=title)
        logger.set_names(
            ['Epoch', 'LR', 'Train Loss', 'Val Loss', 'Train Acc', 'Val Acc'])

    cudnn.benchmark = True
    print('    Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))

    # Data loading code
    train_dataset, train_loader, val_loader = _make_torch_data_loaders(args)

    if args.evaluate:
        print('\nEvaluation only')
        loss, acc, predictions = validate(val_loader, model, criterion,
                                          args.num_classes, args.debug,
                                          args.flip)
        save_pred(predictions, checkpoint=args.checkpoint_dir)
        return

    lr = args.lr
    for epoch in range(args.start_epoch, args.epochs):
        lr = adjust_learning_rate(optimizer, epoch, lr, args.schedule,
                                  args.gamma)
        print('\nEpoch: %d | LR: %.8f' % (epoch + 1, lr))

        # decay sigma
        if args.sigma_decay > 0:
            train_loader.dataset.sigma *= args.sigma_decay
            val_loader.dataset.sigma *= args.sigma_decay

        # train for one epoch
        train_loss, train_acc, joint_visibility_loss, joint_visibility_acc = train(
            train_loader,
            model=model,
            joint_visibility_model=joint_visibility_model,
            criterion=criterion,
            num_joints=args.num_classes,
            joint_visibility_criterion=joint_visibility_criterion,
            optimizer=optimizer,
            epoch=epoch,
            writer=writer,
            lr=lr,
            debug=args.debug,
            flip=args.flip,
            remove_intermediate_supervision=args.
            remove_intermediate_supervision,
            tb_freq=args.tb_log_freq,
            no_grad_clipping=args.no_grad_clipping,
            grad_clip=args.grad_clip,
            use_horovod=args.use_horovod,
            predict_joint_visibility=args.predict_joint_visibility,
            predict_joint_loss_coeff=args.joint_visibility_loss_coeff)

        # evaluate on validation set
        valid_loss, valid_acc_PCK, valid_acc_PCKh, valid_acc_PCKh_per_joint, valid_joint_visibility_loss, valid_joint_visibility_acc, predictions = validate(
            val_loader, model, joint_visibility_model, criterion,
            joint_visibility_criterion, args.num_classes, args.debug,
            args.flip, args.use_horovod, args.use_train_mode_to_eval,
            args.predict_joint_visibility)

        # append logger file, and write to tensorboard summaries
        writer.add_scalars('data/epoch/losses_wrt_epochs', {
            'train_loss': train_loss,
            'test_lost': valid_loss
        }, epoch)
        writer.add_scalar('data/epoch/train_accuracy_PCK', train_acc, epoch)
        writer.add_scalar('data/epoch/test_accuracy_PCK', valid_acc_PCK, epoch)
        writer.add_scalar('data/epoch/test_accuracy_PCKh', valid_acc_PCKh,
                          epoch)
        for key in valid_acc_PCKh_per_joint:
            writer.add_scalar(
                'per_joint_data/epoch/test_accuracy_PCKh_%s' % key,
                valid_acc_PCKh_per_joint[key], epoch)
        logger.append(
            [epoch + 1, lr, train_loss, valid_loss, train_acc, valid_acc_PCK])
        if args.predict_joint_visibility:
            writer.add_scalars(
                'joint_visibility/epoch/loss', {
                    'train': joint_visibility_loss,
                    'test_lost': valid_joint_visibility_loss
                }, epoch)
            writer.add_scalars(
                'joint_visibility/epoch/acc', {
                    'train': joint_visibility_acc,
                    'test_lost': valid_joint_visibility_acc
                }, epoch)

        # remember best acc and save checkpoint
        model_specific_checkpoint_dir = "%s/hourglass_mpii_%s" % (
            args.checkpoint_dir, args.exp)
        if not isdir(model_specific_checkpoint_dir):
            mkdir_p(model_specific_checkpoint_dir)

        is_best = valid_acc_PCK > best_acc
        best_acc = max(valid_acc_PCK, best_acc)
        mean, stddev = train_dataset.get_mean_stddev()
        checkpoint = {
            'epoch': epoch + 1,
            'arch': args.arch,
            'state_dict': model.state_dict(),
            'best_acc': best_acc,
            'optimizer': optimizer.state_dict(),
            'mean': mean,
            'stddev': stddev,
        }
        if args.predict_joint_visibility:
            checkpoint[
                'joint_visibility_state_dict'] = joint_visibility_model.state_dict(
                )
        save_checkpoint(checkpoint,
                        predictions,
                        is_best,
                        checkpoint=model_specific_checkpoint_dir)

    logger.close()
Exemplo n.º 19
0
def viz_orthog_transform(options):
    """
    Visual

    Options that should be included:
    options.data_dir: the directory for the dataset of poses
    options.load: checkpoint file for the model
    options.index: the index into the dataset to visualize
    options.num_orientations: the number of re-orientations to make
    options.dataset_normalization: if the network was trained with dataset normalizations
    options.output_dir: the directory to output the visualization image

    :param options: Options for the visualization, defined in options.py.
    """
    # Unpack options
    data_dir = options.data_dir
    model_checkpoint_file = options.load
    index = options.index
    num_orientations = options.num_orientations
    dataset_normalize = options.dataset_normalization

    # Make output dir
    if not isdir(options.output_dir):
        mkdir_p(options.output_dir)

    # Make the dataset object, and load the model, and put it in eval mode
    dataset = Human36mDataset(dataset_path=data_dir, orthogonal_data_augmentation_prob=0.0,
                              z_rotations_only=options.z_rotations_only, dataset_normalization=dataset_normalize)
    model = LinearModel(dataset_normalized_input=dataset_normalize).cuda()
    ckpt = torch.load(model_checkpoint_file)
    model.load_state_dict(ckpt['state_dict'])
    model.eval()

    # Loop
    vstack = []
    for i in range(num_orientations):
        # Get the data from the dataset
        _, _, pose_2d_gt, pose_3d_gt, meta = dataset[index]

        # Run the model to get the prediction (put it in a 'psuedo batch' of size 1)
        pose_3d_pred = model(torch.Tensor(pose_2d_gt).view((1,-1)).cuda()).cpu().detach().numpy()

        # Unnormalized poses (adding and remove the phantom batching as needed)
        pose_2d_gt = np.expand_dims(pose_2d_gt, axis=0)
        pose_3d_gt = np.expand_dims(pose_3d_gt, axis=0)
        pose_2d_gt_unnorm = data_utils.unNormalizeData(pose_2d_gt, meta, dataset_normalize, is_2d=True)[0]
        pose_3d_gt_unnorm = data_utils.unNormalizeData(pose_3d_gt, meta, dataset_normalize)[0]
        pose_3d_pred_unnorm = data_utils.unNormalizeData(pose_3d_pred, meta, dataset_normalize)[0]

        # Visualize in a hstack
        pose_2d_gt_img = viz_2d_pose(pose_2d_gt_unnorm)
        pose_3d_gt_img = viz_3d_pose(pose_3d_gt_unnorm)
        # pose_3d_gt_img = viz_3d_pose(meta['3d_pose_camera_coords'])
        # pose_3d_pred_img = viz_3d_pose(meta['3d_pose_camera_coords'])
        pose_3d_pred_img = viz_3d_pose(pose_3d_pred_unnorm)


        # If it's the first iteration, now switch to using the orthogonal data augmentation
        if i == 0: dataset.orthogonal_data_augmentation_prob = 1.0

        # Append hstacked image to vstack
        vstack.append(_pack_images([pose_2d_gt_img, pose_3d_gt_img, pose_3d_pred_img]))

    # Compute the vstacked image and save it
    final_visualization = _pack_images_col(vstack)
    output_filename = os.path.join(options.output_dir, "{index}.jpg".format(index=index))
    scipy.misc.imsave(output_filename, final_visualization)
Exemplo n.º 20
0
def main(args):
    # create model
    model = network.__dict__[cfg.model](cfg.output_shape,
                                        cfg.num_class,
                                        pretrained=False)
    model = torch.nn.DataParallel(model).cuda()

    test_loader = torch.utils.data.DataLoader(MscocoMulti(cfg, train=False),
                                              batch_size=args.batch *
                                              args.num_gpus,
                                              shuffle=False,
                                              num_workers=args.workers,
                                              pin_memory=True)

    # load trainning weights
    checkpoint_file = os.path.join(args.checkpoint, args.test + '.pth.tar')
    checkpoint = torch.load(checkpoint_file)
    model.load_state_dict(checkpoint['state_dict'])
    print("=> loaded checkpoint '{}' (epoch {})".format(
        checkpoint_file, checkpoint['epoch']))

    # change to evaluation mode
    model.eval()

    print('testing...')
    full_result = []
    for i, (inputs, meta) in tqdm(enumerate(test_loader)):
        with torch.no_grad():
            input_var = torch.autograd.Variable(inputs.cuda())
            if args.flip == True:
                flip_inputs = inputs.clone()
                for i, finp in enumerate(flip_inputs):
                    finp = im_to_numpy(finp)
                    finp = cv2.flip(finp, 1)
                    flip_inputs[i] = im_to_torch(finp)
                flip_input_var = torch.autograd.Variable(flip_inputs.cuda())

            # compute output
            global_outputs, refine_output = model(input_var)
            score_map = refine_output.data.cpu()
            score_map = score_map.numpy()

            if args.flip == True:
                flip_global_outputs, flip_output = model(flip_input_var)
                flip_score_map = flip_output.data.cpu()
                flip_score_map = flip_score_map.numpy()

                for i, fscore in enumerate(flip_score_map):
                    fscore = fscore.transpose((1, 2, 0))
                    fscore = cv2.flip(fscore, 1)
                    fscore = list(fscore.transpose((2, 0, 1)))
                    for (q, w) in cfg.symmetry:
                        fscore[q], fscore[w] = fscore[w], fscore[q]
                    fscore = np.array(fscore)
                    score_map[i] += fscore
                    score_map[i] /= 2

            ids = meta['imgID'].numpy()
            det_scores = meta['det_scores']
            for b in range(inputs.size(0)):
                details = meta['augmentation_details']
                single_result_dict = {}
                single_result = []

                single_map = score_map[b]
                r0 = single_map.copy()
                r0 /= 255
                r0 += 0.5
                v_score = np.zeros(17)
                for p in range(17):
                    single_map[p] /= np.amax(single_map[p])
                    border = 10
                    dr = np.zeros((cfg.output_shape[0] + 2 * border,
                                   cfg.output_shape[1] + 2 * border))
                    dr[border:-border, border:-border] = single_map[p].copy()
                    dr = cv2.GaussianBlur(dr, (21, 21), 0)
                    lb = dr.argmax()
                    y, x = np.unravel_index(lb, dr.shape)
                    dr[y, x] = 0
                    lb = dr.argmax()
                    py, px = np.unravel_index(lb, dr.shape)
                    y -= border
                    x -= border
                    py -= border + y
                    px -= border + x
                    ln = (px**2 + py**2)**0.5
                    delta = 0.25
                    if ln > 1e-3:
                        x += delta * px / ln
                        y += delta * py / ln
                    x = max(0, min(x, cfg.output_shape[1] - 1))
                    y = max(0, min(y, cfg.output_shape[0] - 1))
                    resy = float((4 * y + 2) / cfg.data_shape[0] *
                                 (details[b][3] - details[b][1]) +
                                 details[b][1])
                    resx = float((4 * x + 2) / cfg.data_shape[1] *
                                 (details[b][2] - details[b][0]) +
                                 details[b][0])
                    v_score[p] = float(r0[p,
                                          int(round(y) + 1e-10),
                                          int(round(x) + 1e-10)])
                    single_result.append(resx)
                    single_result.append(resy)
                    single_result.append(1)
                if len(single_result) != 0:
                    single_result_dict['image_id'] = int(ids[b])
                    single_result_dict['category_id'] = 1
                    single_result_dict['keypoints'] = single_result
                    single_result_dict['score'] = float(
                        det_scores[b]) * v_score.mean()
                    full_result.append(single_result_dict)

    result_path = args.result
    if not isdir(result_path):
        mkdir_p(result_path)
    result_file = os.path.join(result_path, 'result.json')
    with open(result_file, 'w') as wf:
        json.dump(full_result, wf)

    # evaluate on COCO
    eval_gt = COCO(cfg.ori_gt_path)
    eval_dt = eval_gt.loadRes(result_file)
    cocoEval = COCOeval(eval_gt, eval_dt, iouType='keypoints')
    cocoEval.evaluate()
    cocoEval.accumulate()
    cocoEval.summarize()
Exemplo n.º 21
0
def main(args):
    # import pdb; pdb.set_trace()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # device = torch.device("cpu")
    print(device)

    writer = SummaryWriter(cfg.tensorboard_path)
    # create checkpoint dir
    counter = 0
    if not isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    # create model
    model = network.__dict__[cfg.model](cfg.output_shape,
                                        cfg.num_class,
                                        pretrained=True)

    model = torch.nn.DataParallel(model).to(device)
    # model = model.to(device)

    # define loss function (criterion) and optimizer
    criterion_bce = torch.nn.BCELoss().to(device)
    criterion_abs = torch.nn.L1Loss().to(device)
    # criterion_abs = offset_loss().to(device)
    # criterion1 = torch.nn.MSELoss().to(device) # for Global loss
    # criterion2 = torch.nn.MSELoss(reduce=False).to(device) # for refine loss
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=cfg.lr,
                                 weight_decay=cfg.weight_decay)

    if args.resume:
        print(args.resume)
        checkpoint_file_resume = os.path.join(args.checkpoint,
                                              args.resume + '.pth.tar')
        if isfile(checkpoint_file_resume):
            print("=> loading checkpoint '{}'".format(checkpoint_file_resume))
            checkpoint = torch.load(checkpoint_file_resume)
            pretrained_dict = checkpoint['state_dict']
            model.load_state_dict(pretrained_dict)
            args.start_epoch = checkpoint['epoch']
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                checkpoint_file_resume, checkpoint['epoch']))
            logger = Logger(join(args.checkpoint, 'log.txt'), resume=True)
        else:
            print("=> no checkpoint found at '{}'".format(
                checkpoint_file_resume))
    else:
        logger = Logger(join(args.checkpoint, 'log.txt'))
        logger.set_names(['Epoch', 'LR', 'Train Loss'])

    cudnn.benchmark = True
    print('    Total params: %.2fMB' %
          (sum(p.numel() for p in model.parameters()) / (1024 * 1024) * 4))

    train_loader = torch.utils.data.DataLoader(MscocoMulti_double_only(cfg),
                                               batch_size=cfg.batch_size *
                                               args.num_gpus,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    for epoch in range(args.start_epoch, args.epochs):
        lr = adjust_learning_rate(optimizer, epoch, cfg.lr_dec_epoch,
                                  cfg.lr_gamma)
        print('\nEpoch: %d | LR: %.8f' % (epoch + 1, lr))

        # train for one epoch
        train_loss, counter = train(train_loader, model,
                                    [criterion_abs, criterion_bce], writer,
                                    counter, optimizer, device)
        print('train_loss: ', train_loss)

        # append logger file
        logger.append([epoch + 1, lr, train_loss])

        save_model(
            {
                'epoch': epoch + 1,
                'info': cfg.info,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
            },
            checkpoint=args.checkpoint)

    writer.export_scalars_to_json("./test.json")
    writer.close()

    logger.close()