def test(test_cfg,
         model_cfg,
         dataset_cfg,
         checkpoint,
         batch_size,
         work_dir,
         gpus=1,
         workers=4):

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    dataset = call_obj(**dataset_cfg,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           normalize,
                       ]))

    data_loader = torch.utils.data.DataLoader(dataset=dataset,
                                              batch_size=batch_size * gpus,
                                              shuffle=False,
                                              num_workers=workers * gpus)

    # put model on gpus
    if isinstance(model_cfg, list):
        model = [call_obj(**c) for c in model_cfg]
        model = torch.nn.Sequential(*model)
    else:
        model = call_obj(**model_cfg)

    load_checkpoint(model, checkpoint, map_location='cpu')
    model = MMDataParallel(model, device_ids=range(gpus)).cuda()
    model.eval()
    # prepare for evaluation
    num_samples = len(dataset)
    prog_bar = ProgressBar(num_samples // (batch_size * gpus) + 1)
    all_preds = np.zeros((num_samples, model_cfg.skeleton_head.num_joints, 3),
                         dtype=np.float32)

    all_boxes = np.zeros((num_samples, 6))
    filenames = []
    imgnums = []
    image_path = []
    idx = 0

    # copy from hrnet
    with torch.no_grad():
        for i, (input, meta, target, target_weight) in enumerate(data_loader):
            # get prediction
            outputs = model.forward(input, return_loss=False)
            if isinstance(outputs, list):
                output = outputs[-1]
            else:
                output = outputs
            # filp test
            if test_cfg.flip:
                input_flipped = np.flip(input.cpu().numpy(), 3).copy()
                input_flipped = torch.from_numpy(input_flipped).cuda()
                outputs_flipped = model(input_flipped, return_loss=False)
                if isinstance(outputs_flipped, list):
                    output_flipped = outputs_flipped[-1]
                else:
                    output_flipped = outputs_flipped
                output_flipped = flip_back(output_flipped.cpu().numpy(),
                                           dataset.flip_pairs)
                output_flipped = torch.from_numpy(output_flipped.copy()).cuda()
                # feature is not aligned, shift flipped heatmap for higher accuracy
                if test_cfg.shift_heatmap:
                    output_flipped[:, :, :, 1:] = \
                        output_flipped.clone()[:, :, :, 0:-1]
                output = (output + output_flipped) * 0.5

            c = meta['center'].numpy()
            s = meta['scale'].numpy()
            score = meta['score'].numpy()

            num_images = input.size(0)
            preds, maxvals = get_final_preds(test_cfg.post_process,
                                             output.detach().cpu().numpy(), c,
                                             s)

            all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2]
            all_preds[idx:idx + num_images, :, 2:3] = maxvals
            # double check this all_boxes parts
            all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2]
            all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2]
            all_boxes[idx:idx + num_images, 4] = np.prod(s * 200, 1)
            all_boxes[idx:idx + num_images, 5] = score
            image_path.extend(meta['image'])

            idx += num_images
            prog_bar.update()

        name_values, perf_indicator = dataset.evaluate(test_cfg, all_preds,
                                                       work_dir, all_boxes,
                                                       image_path, filenames,
                                                       imgnums)
    return perf_indicator