예제 #1
0
def prepare_input(
    rgb: torch.Tensor,
    resize_res: int = 256,
    inp_res: int = 224,
    mean: torch.Tensor = 0.5 * torch.ones(3), std=1.0 * torch.ones(3),
):
    """
    Process the video:
    1) Resize to [resize_res x resize_res]
    2) Center crop with [inp_res x inp_res]
    3) Color normalize using mean/std
    """
    iC, iF, iH, iW = rgb.shape
    rgb_resized = np.zeros((iF, resize_res, resize_res, iC))
    for t in range(iF):
        tmp = rgb[:, t, :, :]
        tmp = resize_generic(
            im_to_numpy(tmp), resize_res, resize_res, interp="bilinear", is_flow=False
        )
        rgb_resized[t] = tmp
    rgb = np.transpose(rgb_resized, (3, 0, 1, 2))
    # Center crop coords
    ulx = int((resize_res - inp_res) / 2)
    uly = int((resize_res - inp_res) / 2)
    # Crop 256x256
    rgb = rgb[:, :, uly : uly + inp_res, ulx : ulx + inp_res]
    rgb = to_torch(rgb).float()
    assert rgb.max() <= 1
    rgb = color_normalize(rgb, mean, std)
    return rgb
def save_imgs(imgs, save_dir):
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    for j in range(0, imgs.size(0)):
        # print img_list[j].size()
        img_show = imutils.im_to_numpy(imgs[j]) * 255
        img_show = Image.fromarray(img_show.astype('uint8'), 'RGB')
        img_show.save(save_dir + '/%d.jpg' % j)
예제 #3
0
    def generateSampleFace(self, idx):
        sf = self.scale_factor
        rf = self.rot_factor

        main_pts = load_lua(
            os.path.join(self.img_folder, 'landmarks_t7',
                         self.anno[idx].split('_')[0],
                         self.anno[idx][:-4] + '.t7'))
        pts = main_pts[0] if self.pointType == '2D' else main_pts[1]

        #pts2 = main_pts[1]
        c = torch.Tensor((450 / 2, 450 / 2 + 50))
        s = 1.8

        img = load_image(
            os.path.join(self.img_folder, self.anno[idx].split('_')[0],
                         self.anno[idx][:-8] + '.jpg'))

        r = 0
        if self.is_train:
            s = s * torch.randn(1).mul_(sf).add_(1).clamp(1 - sf, 1 + sf)[0]
            r = torch.randn(1).mul_(rf).clamp(
                -2 * rf, 2 * rf)[0] if random.random() <= 0.6 else 0

            if random.random() <= 0.5:
                img = torch.from_numpy(fliplr(img.numpy())).float()
                pts = shufflelr(pts, width=img.size(2), dataset='w300lp')
                c[0] = img.size(2) - c[0]

            img[0, :, :].mul_(random.uniform(0.7, 1.3)).clamp_(0, 1)
            img[1, :, :].mul_(random.uniform(0.7, 1.3)).clamp_(0, 1)
            img[2, :, :].mul_(random.uniform(0.7, 1.3)).clamp_(0, 1)

        # Prepare image and groundtruth map
        inp = HumanAug.crop(imutils.im_to_numpy(img), c.numpy(), s, r, 256,
                            200)

        inp = imutils.im_to_torch(inp).float()

        pts_input_res = HumanAug.TransformPts(pts.numpy(), c.numpy(), s, r,
                                              256, 200)
        pts_aug = pts_input_res * (1. * 64 / 256)

        # Generate ground truth
        heatmap, pts_aug = HumanPts.pts2heatmap(pts_aug, [64, 64], sigma=1)
        heatmap = torch.from_numpy(heatmap).float()

        # inp = crop(img, c, s, [256, 256], rot=r)
        # # inp = color_normalize(inp, self.mean, self.std)

        # tpts = pts.clone()
        # out = torch.zeros(self.nParts, 64, 64)
        # for i in range(self.nParts):
        #     if tpts[i, 0] > 0:
        #         tpts[i, 0:2] = to_torch(transform(tpts[i, 0:2] + 1, c, s, [64, 64], rot=r))
        #         out[i] = draw_labelmap(out[i], tpts[i] - 1, sigma=1)

        return inp, heatmap, pts, c, s, pts_input_res
예제 #4
0
    def __getitem__(self, index):
        # print('loading image', index)
        if self.is_train:
            a = self.anno[self.train[index]]
        else:
            a = self.anno[self.valid[index]]

        img_path = os.path.join(self.img_folder, a['img_paths'])
        pts = torch.Tensor(a['joint_self'])
        # pts[:, 0:2] -= 1  # Convert pts to zero based
        pts = pts[:, 0:2]

        # c = torch.Tensor(a['objpos']) - 1
        c = torch.Tensor(a['objpos'])
        # print(c)
        s = torch.Tensor([a['scale_provided']])
        # exit()
        if a['dataset'] == 'MPII':
            c[1] = c[1] + 15 * s[0]
            s = s * 1.25
            normalizer = a['normalizer'] * 0.6
        elif a['dataset'] == 'LEEDS':
            print('using lsp data')
            s = s * 1.4375
            normalizer = torch.dist(pts[2, :], pts[13, :])
        else:
            print('no such dataset {}'.format(a['dataset']))

        # For single-person pose estimation with a centered/scaled figure
        img = imutils.load_image(img_path)
        # img = Image.open(img_path)

        inp = HumanAug.crop(imutils.im_to_numpy(img), c.numpy(), s.numpy(), 0,
                            self.inp_res, self.std_size)
        inp = imutils.im_to_torch(inp).float()
        # inp = self.color_normalize(inp, self.mean, self.std)
        # pts_aug = HumanAug.TransformPts(pts.numpy(), c.numpy(),
        #                                 s.numpy(), 0, self.out_res, self.std_size)
        #
        # # Generate ground truth
        # heatmap, pts_aug = HumanPts.pts2heatmap(pts_aug, [self.out_res, self.out_res], sigma=1)
        # heatmap = torch.from_numpy(heatmap).float()
        tmp_scale_distri = self.grnd_scale_distri[
            index] / self.grnd_scale_distri[index].sum()
        tmp_rot_distri = self.grnd_rotation_distri[
            index] / self.grnd_rotation_distri[index].sum()
        return inp, tmp_scale_distri, tmp_rot_distri, index
예제 #5
0
    def gen_img_heatmap(self, c, s, r, img, pts):
        # Prepare image and groundtruth map
        # print s[0]/s0[0], r
        inp = HumanAug.crop(imutils.im_to_numpy(img), c.numpy(),
                            s.numpy(), r, self.inp_res, self.std_size)
        inp = imutils.im_to_torch(inp).float()
        # inp = self.color_normalize(inp, self.mean, self.std)
        pts_aug = HumanAug.TransformPts(pts.numpy(), c.numpy(),
                                        s.numpy(), r, self.out_res, self.std_size)

        idx_indicator = (pts[:, 0] <= 0) | (pts[:, 1] <= 0)
        idx = torch.arange(0, pts.size(0)).long()
        idx = idx[idx_indicator]
        pts_aug[idx, :] = 0
        # Generate ground truth
        heatmap, pts_aug = HumanPts.pts2heatmap(pts_aug, [self.out_res, self.out_res], sigma=1)
        heatmap = torch.from_numpy(heatmap).float()
        # pts_aug = torch.from_numpy(pts_aug).float()

        return inp, heatmap
예제 #6
0
def main(args):
    # create model
    model = network.__dict__[cfg.model](cfg.output_shape,
                                        cfg.num_class,
                                        pretrained=False)
    model = torch.nn.DataParallel(model).cuda()

    test_loader = torch.utils.data.DataLoader(MscocoMulti(cfg, train=False),
                                              batch_size=args.batch *
                                              args.num_gpus,
                                              shuffle=False,
                                              num_workers=args.workers,
                                              pin_memory=True)

    # load trainning weights
    checkpoint_file = os.path.join(args.checkpoint, args.test + '.pth.tar')
    checkpoint = torch.load(checkpoint_file)
    model.load_state_dict(checkpoint['state_dict'])
    print("=> loaded checkpoint '{}' (epoch {})".format(
        checkpoint_file, checkpoint['epoch']))

    # change to evaluation mode
    model.eval()

    print('testing...')
    full_result = []
    for i, (inputs, meta) in tqdm(enumerate(test_loader)):
        with torch.no_grad():
            input_var = torch.autograd.Variable(inputs.cuda())
            if args.flip == True:
                flip_inputs = inputs.clone()
                for i, finp in enumerate(flip_inputs):
                    finp = im_to_numpy(finp)
                    finp = cv2.flip(finp, 1)
                    flip_inputs[i] = im_to_torch(finp)
                flip_input_var = torch.autograd.Variable(flip_inputs.cuda())

            # compute output
            global_outputs, refine_output = model(input_var)
            score_map = refine_output.data.cpu()
            score_map = score_map.numpy()

            if args.flip == True:
                flip_global_outputs, flip_output = model(flip_input_var)
                flip_score_map = flip_output.data.cpu()
                flip_score_map = flip_score_map.numpy()

                for i, fscore in enumerate(flip_score_map):
                    fscore = fscore.transpose((1, 2, 0))
                    fscore = cv2.flip(fscore, 1)
                    fscore = list(fscore.transpose((2, 0, 1)))
                    for (q, w) in cfg.symmetry:
                        fscore[q], fscore[w] = fscore[w], fscore[q]
                    fscore = np.array(fscore)
                    score_map[i] += fscore
                    score_map[i] /= 2

            ids = meta['imgID'].numpy()
            det_scores = meta['det_scores']
            for b in range(inputs.size(0)):
                details = meta['augmentation_details']
                single_result_dict = {}
                single_result = []

                single_map = score_map[b]
                r0 = single_map.copy()
                r0 /= 255
                r0 += 0.5
                v_score = np.zeros(17)
                for p in range(17):
                    single_map[p] /= np.amax(single_map[p])
                    border = 10
                    dr = np.zeros((cfg.output_shape[0] + 2 * border,
                                   cfg.output_shape[1] + 2 * border))
                    dr[border:-border, border:-border] = single_map[p].copy()
                    dr = cv2.GaussianBlur(dr, (21, 21), 0)
                    lb = dr.argmax()
                    y, x = np.unravel_index(lb, dr.shape)
                    dr[y, x] = 0
                    lb = dr.argmax()
                    py, px = np.unravel_index(lb, dr.shape)
                    y -= border
                    x -= border
                    py -= border + y
                    px -= border + x
                    ln = (px**2 + py**2)**0.5
                    delta = 0.25
                    if ln > 1e-3:
                        x += delta * px / ln
                        y += delta * py / ln
                    x = max(0, min(x, cfg.output_shape[1] - 1))
                    y = max(0, min(y, cfg.output_shape[0] - 1))
                    resy = float((4 * y + 2) / cfg.data_shape[0] *
                                 (details[b][3] - details[b][1]) +
                                 details[b][1])
                    resx = float((4 * x + 2) / cfg.data_shape[1] *
                                 (details[b][2] - details[b][0]) +
                                 details[b][0])
                    v_score[p] = float(r0[p,
                                          int(round(y) + 1e-10),
                                          int(round(x) + 1e-10)])
                    single_result.append(resx)
                    single_result.append(resy)
                    single_result.append(1)
                if len(single_result) != 0:
                    single_result_dict['image_id'] = int(ids[b])
                    single_result_dict['category_id'] = 1
                    single_result_dict['keypoints'] = single_result
                    single_result_dict['score'] = float(
                        det_scores[b]) * v_score.mean()
                    full_result.append(single_result_dict)

    result_path = args.result
    if not isdir(result_path):
        mkdir_p(result_path)
    result_file = os.path.join(result_path, 'result.json')
    with open(result_file, 'w') as wf:
        json.dump(full_result, wf)

    # evaluate on COCO
    eval_gt = COCO(cfg.ori_gt_path)
    eval_dt = eval_gt.loadRes(result_file)
    cocoEval = COCOeval(eval_gt, eval_dt, iouType='keypoints')
    cocoEval.evaluate()
    cocoEval.accumulate()
    cocoEval.summarize()
def crop(img, center, scale, res, rot=0):
    img = im_to_numpy(img)

    # Preprocessing for efficient cropping
    ht, wd = img.shape[0], img.shape[1]
    sf = scale * 200.0 / res[0]

    if sf < 2:  # res / img scale 비율이 너무 작으면 그대로 진행
        sf = 1
    else:  # res scale로 mapping할때 적절한 사람 크기가 되도록 resize
        new_size = int(np.math.floor(max(ht, wd) / sf))
        new_ht = int(np.math.floor(ht / sf))
        new_wd = int(np.math.floor(wd / sf))
        if new_size < 2:
            print('cannot cropping')
            return torch.zeros(res[0], res[1], img.shape[2]) if len(
                img.shape) > 2 else torch.zeros(res[0], res[1])
        else:  # res scale 좌표계의 center와 scale을 구한다.
            img = cv2.resize(img,
                             dsize=(new_wd, new_ht),
                             interpolation=cv2.INTER_LINEAR
                             )  # cv2 img format: width x height
            center = center * 1.0 / sf
            scale = scale / sf  # 1.28

    # Upper left point
    ul = np.array(affine_transform([0, 0], center, scale, res, invert=1))
    # invert가 1인 이유는 new img의 [0, 0] 좌표가 old img의 좌표 어디인지 알기위함.
    # Bottom right point
    br = np.array(affine_transform(res, center, scale, res, invert=1))

    pad = int(np.linalg.norm(br - ul) / 2 - float(br[1] - ul[1]) / 2)
    if rot != 0:
        ul -= pad
        br += pad

    # crop area
    new_shape = [br[1] - ul[1], br[0] - ul[0]]
    if len(img.shape) > 2:
        new_shape += [img.shape[2]]
    new_img = np.zeros(new_shape)  # HWC

    # Range to fill new array (boundary zero area)
    new_x = max(0, -ul[0]), min(br[0], img.shape[1]) - ul[0]
    new_y = max(0, -ul[1]), min(br[1], img.shape[0]) - ul[1]
    # Range to sample from original image
    old_x = max(0, ul[0]), min(img.shape[1], br[0])
    old_y = max(0, ul[1]), min(img.shape[0], br[1])
    new_img[new_y[0]:new_y[1], new_x[0]:new_x[1]] = img[old_y[0]:old_y[1],
                                                        old_x[0]:old_x[1]]

    if rot != 0:
        # Remove padding
        rmat = cv2.getRotationMatrix2D((new_shape[1] / 2, new_shape[0] / 2),
                                       rot, 1)
        new_img = cv2.warpAffine(new_img, rmat, (new_shape[1], new_shape[0]))
        new_img = new_img[pad:-pad, pad:-pad]

    new_img = im_to_torch(
        cv2.resize(new_img,
                   dsize=(res[1], res[0]),
                   interpolation=cv2.INTER_LINEAR))
    return new_img
예제 #8
0
파일: train.py 프로젝트: AmonLiu/MLPE
def test(test_loader, model):
    model.eval()

    print('testing...')
    full_result = []
    flip = True
    for i, (inputs, meta) in tqdm(enumerate(test_loader)):
        with torch.no_grad():
            input_var = torch.autograd.Variable(inputs.cuda())
            if flip == True:
                flip_inputs = inputs.clone()
                for i, finp in enumerate(flip_inputs):
                    finp = im_to_numpy(finp)
                    finp = cv2.flip(finp, 1)
                    flip_inputs[i] = im_to_torch(finp)
                flip_input_var = torch.autograd.Variable(flip_inputs.cuda())

            # compute output
            global_outputs, refine_output = model(input_var)
            score_map = refine_output.data.cpu()
            score_map = score_map.numpy()

            if flip == True:
                flip_global_outputs, flip_output = model(flip_input_var)
                flip_score_map = flip_output.data.cpu()
                flip_score_map = flip_score_map.numpy()

                for i, fscore in enumerate(flip_score_map):
                    fscore = fscore.transpose((1, 2, 0))
                    fscore = cv2.flip(fscore, 1)
                    fscore = list(fscore.transpose((2, 0, 1)))
                    for (q, w) in test_cfg.symmetry:
                        fscore[q], fscore[w] = fscore[w], fscore[q]
                    fscore = np.array(fscore)
                    score_map[i] += fscore
                    score_map[i] /= 2

            ids = meta['imgID'].numpy()
            det_scores = meta['det_scores']
            for b in range(inputs.size(0)):
                details = meta['augmentation_details']
                single_result_dict = {}
                single_result = []

                single_map = score_map[b]
                r0 = single_map.copy()
                r0 /= 255
                r0 += 0.5
                v_score = np.zeros(17)
                for p in range(17):
                    single_map[p] /= np.amax(single_map[p])
                    border = 10
                    dr = np.zeros((test_cfg.output_shape[0] + 2 * border,
                                   test_cfg.output_shape[1] + 2 * border))
                    dr[border:-border, border:-border] = single_map[p].copy()
                    dr = cv2.GaussianBlur(dr, (21, 21), 0)
                    lb = dr.argmax()
                    y, x = np.unravel_index(lb, dr.shape)
                    dr[y, x] = 0
                    lb = dr.argmax()
                    py, px = np.unravel_index(lb, dr.shape)
                    y -= border
                    x -= border
                    py -= border + y
                    px -= border + x
                    ln = (px**2 + py**2)**0.5
                    delta = 0.25
                    if ln > 1e-3:
                        x += delta * px / ln
                        y += delta * py / ln
                    x = max(0, min(x, test_cfg.output_shape[1] - 1))
                    y = max(0, min(y, test_cfg.output_shape[0] - 1))
                    resy = float((4 * y + 2) / test_cfg.data_shape[0] *
                                 (details[b][3] - details[b][1]) +
                                 details[b][1])
                    resx = float((4 * x + 2) / test_cfg.data_shape[1] *
                                 (details[b][2] - details[b][0]) +
                                 details[b][0])
                    v_score[p] = float(r0[p,
                                          int(round(y) + 1e-10),
                                          int(round(x) + 1e-10)])
                    single_result.append(resx)
                    single_result.append(resy)
                    single_result.append(1)
                if len(single_result) != 0:
                    single_result_dict['image_id'] = int(ids[b])
                    single_result_dict['category_id'] = 1
                    single_result_dict['keypoints'] = single_result
                    single_result_dict['score'] = float(
                        det_scores[b]) * v_score.mean()
                    full_result.append(single_result_dict)

    result_path = 'result'
    if not isdir(result_path):
        mkdir_p(result_path)
    result_file = os.path.join(result_path, 'result.json')
    with open(result_file, 'w') as wf:
        json.dump(full_result, wf)

    # evaluate on COCO
    eval_gt = COCO(test_cfg.ori_gt_path)
    eval_dt = eval_gt.loadRes(result_file)
    cocoEval = COCOeval(eval_gt, eval_dt, iouType='keypoints')
    cocoEval.evaluate()
    cocoEval.accumulate()
    cocoEval.summarize()
    def __getitem__(self, index):

        if self.is_train:
            a = self.anno[self.train[index]]
        else:
            a = self.anno[self.valid[index]]

        img_path = os.path.join(self.img_folder, a['img_paths'])
        pts = torch.Tensor(a['joint_self'])
        # pts[:, 0:2] -= 1  # Convert pts to zero based
        pts = pts[:, 0:2]

        # c = torch.Tensor(a['objpos']) - 1
        c = torch.Tensor(a['objpos'])
        # print c
        s = torch.Tensor([a['scale_provided']])
        # print s
        # exit()
        if a['dataset'] == 'MPII':
            c[1] = c[1] + 15 * s[0]
            s = s * 1.25
            normalizer = a['normalizer'] * 0.6
        elif a['dataset'] == 'LEEDS':
            print 'using lsp data'
            s = s * 1.4375
            normalizer = torch.dist(pts[2, :], pts[13, :])
        else:
            print 'no such dataset {}'.format(a['dataset'])

        # For single-person pose estimation with a centered/scaled figure
        img = imutils.load_image(img_path)
        # print img.size()
        # exit()
        # img = scipy.misc.imread(img_path, mode='RGB') # CxHxW
        # img = torch.from_numpy(img)

        r = 0
        if self.is_train:
            s = s * (2**(sample_from_bounded_gaussian(self.scale_factor)))
            r = sample_from_bounded_gaussian(self.rot_factor)
            if np.random.uniform(0, 1, 1) <= 0.6:
                r = 0

            # Flip
            if np.random.random() <= 0.5:
                img = torch.from_numpy(HumanAug.fliplr(img.numpy())).float()
                pts = HumanAug.shufflelr(pts,
                                         width=img.size(2),
                                         dataset='mpii')
                c[0] = img.size(2) - c[0]

            # Color
            img[0, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1)
            img[1, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1)
            img[2, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1)

        # Prepare image and groundtruth map
        inp = HumanAug.crop(imutils.im_to_numpy(img), c.numpy(), s.numpy(), r,
                            self.inp_res, self.std_size)
        inp = imutils.im_to_torch(inp).float()
        # inp = self.color_normalize(inp, self.mean, self.std)
        pts_aug = HumanAug.TransformPts(pts.numpy(), c.numpy(), s.numpy(), r,
                                        self.out_res, self.std_size)

        #idx_indicator = (pts[:, 0] <= 0) | (pts[:, 1] <= 0)
        #idx = torch.arange(0, pts.size(0)).long()
        #idx = idx[idx_indicator]
        #pts_aug[idx, :] = 0
        # Generate ground truth
        heatmap, pts_aug = HumanPts.pts2heatmap(pts_aug,
                                                [self.out_res, self.out_res],
                                                sigma=1)
        heatmap = torch.from_numpy(heatmap).float()
        # pts_aug = torch.from_numpy(pts_aug).float()

        r = torch.FloatTensor([r])
        #normalizer = torch.FloatTensor([normalizer])
        if self.is_train:
            #print 'inp size: ', inp.size()
            #print 'heatmap size: ', heatmap.size()
            #print 'c size: ', c.size()
            #print 's size: ', s.size()
            #print 'r size: ', r.size()
            #print 'pts size: ', pts.size()
            #print 'normalizer size: ', normalizer.size()
            #print 'r: ', r
            #    if len(r.size()) != 1:
            #	print 'r: ', r
            #    if len(c.size()) != 1:
            #	print 'c: ', c
            return inp, heatmap, c, s, r, pts, normalizer
        else:
            # Meta info
            #meta = {'index': index, 'center': c, 'scale': s,
            #        'pts': pts, 'tpts': pts_aug}

            return inp, heatmap, c, s, r, pts, normalizer, index
예제 #10
0
    def __getitem__(self, index):

        if self.is_train:
            a = self.anno[self.train[index]]
        else:
            a = self.anno[self.valid[index]]

        img_path = os.path.join(self.img_folder, a['img_paths'])
        pts_path = os.path.join(self.img_folder, a['pts_paths'])

        skip_pts = [33, 36, 39, 42, 45, 48, 51, 54, 57]

        if pts_path[-4:] == '.txt':
            pts = np.loadtxt(pts_path)  # L x 2
#pts = pts[skip_pts, :]

        elif pts_path[-4:] == '.pts':
            pts = FacePts.Pts2Lmk(pts_path)  # L x 2
#pts = pts[skip_pts, :]

#print(pts)

        pts = torch.Tensor(pts)
        assert torch.sum(pts - torch.Tensor(a['pts'])) == 0
        s = torch.Tensor([a['scale_provided_det']]) * 1.1
        c = torch.Tensor(a['objpos_det'])
        # For single-person pose estimation with a centered/scaled figure
        img = imutils.load_image(img_path)
        # print img.size()
        # exit()
        # img = scipy.misc.imread(img_path, mode='RGB') # CxHxW
        # img = torch.from_numpy(img)

        r = 0
        if self.is_train:
            s = s * (2**(sample_from_bounded_gaussian(self.scale_factor)))
            r = sample_from_bounded_gaussian(self.rot_factor)
            if np.random.uniform(0, 1, 1) <= 0.6:
                r = np.array([0])

            # Flip
            #if np.random.random() <= 0.5:
            #    img = torch.from_numpy(HumanAug.fliplr(img.numpy())).float()
            #    pts = HumanAug.shufflelr(pts, width=img.size(2), dataset='face')
            #    c[0] = img.size(2) - c[0]

            # Color
            img[0, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1)
            img[1, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1)
            img[2, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1)

        # Prepare image and groundtruth map
        inp = HumanAug.crop(imutils.im_to_numpy(img), c.numpy(), s.numpy(), r,
                            self.inp_res, self.std_size)
        inp = imutils.im_to_torch(inp).float()
        # inp = self.color_normalize(inp, self.mean, self.std)
        # pts_aug = HumanAug.TransformPts(pts.numpy(), c.numpy(),
        #                                 s.numpy(), r, self.out_res, self.std_size)
        pts_input_res = HumanAug.TransformPts(pts.numpy(), c.numpy(),
                                              s.numpy(), r, self.inp_res,
                                              self.std_size)
        pts_aug = pts_input_res * (1. * self.out_res / self.inp_res)

        #check_res = pts_input_res - pts

        #print('diff.... -> {}'.format(check_res))

        # Generate ground truth
        heatmap, pts_aug = HumanPts.pts2heatmap(pts_aug,
                                                [self.out_res, self.out_res],
                                                sigma=1)
        heatmap = torch.from_numpy(heatmap).float()
        # pts_aug = torch.from_numpy(pts_aug).float()

        if self.is_train:
            return inp, heatmap, pts_input_res
        else:
            # Meta info
            #meta = {'index': index, 'center': c, 'scale': s,
            #        'pts': pts, 'tpts': pts_aug}

            return inp, heatmap, pts, index, c, s, img_path
예제 #11
0
    def _get_single_video(self, index, data_index, frame_ix):
        """Loads/augments/returns the video data
        :param index: Index wrt to the data loader
        :param data_index: Index wrt to train/valid list
        :param frame_ix: A list of frame indices to sample from the video
        :return data: Dictionary of input/output and other metadata
        """
        # If the input is pose (Pose->Sign experiments)
        if hasattr(self, "input_type") and self.input_type == "pose":
            data = {
                "rgb": self._get_pose(data_index, frame_ix),
                "index": index,
                "data_index": data_index,
                "class": self._get_class(data_index, frame_ix),
                "class_names": self.class_names,
                "dataset": self.datasetname,
            }
            return data
        # Otherwise the input is RGB
        else:
            rgb = self._load_rgb(data_index, frame_ix)
            if getattr(self, "mask_rgb", False):
                rgb = self._mask_rgb(
                    rgb,
                    data_index,
                    frame_ix,
                    region=self.mask_rgb,
                    mask_type=self.mask_type,
                )

        if getattr(self, "gpu_collation", False):
            # Meta info
            data = {
                "rgb": rgb,
                "index": index,
                "data_index": data_index,
                "class": self._get_class(data_index, frame_ix),
                "class_names": self.class_names,
                "dataset": self.datasetname,
            }
            return data

        # Preparing RGB data
        if self.setname == "train":
            # Horizontal flip: disable for now, should be done after the bbox cropping
            is_hflip = random.random() < self.hflip
            if is_hflip:
                rgb = torch.flip(rgb, dims=[2])
            # Color jitter
            rgb = im_color_jitter(rgb, num_in_frames=self.num_in_frames, thr=0.2)

        rgb = im_to_numpy(rgb)
        iH, iW, iC = rgb.shape

        if self.use_bbox:
            y0, x0, y1, x1 = self._get_bbox(data_index)
            y0 = max(0, int(y0 * iH))
            y1 = min(iH, int(y1 * iH))
            x0 = max(0, int(x0 * iW))
            x1 = min(iW, int(x1 * iW))
            if self.setname == "train" and is_hflip:
                x0 = iW - x0
                x1 = iW - x1
                x0, x1 = x1, x0
            rgb = rgb[y0:y1, x0:x1, :]
            rgb = resize_generic(
                rgb, self.resize_res, self.resize_res, interp="bilinear", is_flow=False,
            )
            iH, iW, iC = rgb.shape

        resol = self.resize_res  # 300 for 256, 130 for 112 etc.
        if self.setname == "train":
            # Augment the scaled resolution between:
            #     [1 - self.scale_factor, 1 + self.scale_factor)
            rand_scale = random.random()
            resol *= 1 - self.scale_factor + 2 * self.scale_factor * rand_scale
            resol = int(resol)
        if iW > iH:
            nH, nW = resol, int(resol * iW / iH)
        else:
            nH, nW = int(resol * iH / iW), resol
        # Resize to nH, nW resolution
        rgb = resize_generic(rgb, nH, nW, interp="bilinear", is_flow=False)

        # Crop
        if self.setname == "train":
            # Random crop coords
            ulx = random.randint(0, nW - self.inp_res)
            uly = random.randint(0, nH - self.inp_res)
        else:
            # Center crop coords
            ulx = int((nW - self.inp_res) / 2)
            uly = int((nH - self.inp_res) / 2)
        # Crop 256x256
        rgb = rgb[uly : uly + self.inp_res, ulx : ulx + self.inp_res]
        rgb = im_to_torch(rgb)
        rgb = im_to_video(rgb)
        rgb = color_normalize(rgb, self.mean, self.std)

        # Return
        data = {
            "rgb": rgb,
            "class": self._get_class(data_index, frame_ix),
            "index": index,
            "class_names": self.class_names,
            "dataset": self.datasetname,
        }

        return data
예제 #12
0
def main(args):
    # create model
    model = network.__dict__[cfg.model](cfg.output_shape, cfg.num_class, pretrained=False)
    model = torch.nn.DataParallel(model).cuda()

    test_loader = torch.utils.data.DataLoader(
        MscocoMulti(cfg, train=False),
        batch_size=args.batch * args.num_gpus, shuffle=False,
        num_workers=args.workers, pin_memory=True)

    # load trainning weights
    # checkpoint_file = os.path.join(args.checkpoint, args.test + '.pth.tar')
    checkpoint_file = os.path.join('model', 'checkpoint', 'epoch9checkpoint.pth.tar')
    checkpoint = torch.load(checkpoint_file)
    model.load_state_dict(checkpoint['state_dict'])
    print("=> loaded checkpoint '{}' (epoch {})".format(checkpoint_file, checkpoint['epoch']))

    # change to evaluation mode
    model.eval()

    print('testing...')
    full_result = []
    for i, (inputs, meta) in tqdm(enumerate(test_loader)):
        with torch.no_grad():
            input_var = torch.autograd.Variable(inputs.cuda())
            if args.flip == True:
                flip_inputs = inputs.clone()
                for i, finp in enumerate(flip_inputs):
                    finp = im_to_numpy(finp)
                    finp = cv2.flip(finp, 1)
                    flip_inputs[i] = im_to_torch(finp)
                flip_input_var = torch.autograd.Variable(flip_inputs.cuda())

            # compute output
            global_outputs, refine_output = model(input_var)
            score_map = refine_output.data.cpu()
            score_map = score_map.numpy()

            if args.flip == True:
                flip_global_outputs, flip_output = model(flip_input_var)
                flip_score_map = flip_output.data.cpu()
                flip_score_map = flip_score_map.numpy()

                for i, fscore in enumerate(flip_score_map):
                    fscore = fscore.transpose((1, 2, 0))
                    fscore = cv2.flip(fscore, 1)
                    fscore = list(fscore.transpose((2, 0, 1)))
                    for (q, w) in cfg.symmetry:
                        fscore[q], fscore[w] = fscore[w], fscore[q]
                    fscore = np.array(fscore)
                    score_map[i] += fscore
                    score_map[i] /= 2

            # ids = meta['imgID'].numpy()
            det_scores = meta['det_scores']
            for b in range(inputs.size(0)):
                details = meta['augmentation_details']
                imgid = meta['imgid'][b]
                # print(imgid)
                category = meta['category'][b]
                # print(category)
                single_result_dict = {}
                single_result = []

                single_map = score_map[b]
                r0 = single_map.copy()
                r0 /= 255
                r0 += 0.5
                v_score = np.zeros(24)
                for p in range(24):
                    single_map[p] /= np.amax(single_map[p])
                    border = 10
                    dr = np.zeros((cfg.output_shape[0] + 2 * border, cfg.output_shape[1] + 2 * border))
                    dr[border:-border, border:-border] = single_map[p].copy()
                    dr = cv2.GaussianBlur(dr, (21, 21), 0)
                    lb = dr.argmax()
                    y, x = np.unravel_index(lb, dr.shape)
                    dr[y, x] = 0
                    lb = dr.argmax()
                    py, px = np.unravel_index(lb, dr.shape)
                    y -= border
                    x -= border
                    py -= border + y
                    px -= border + x
                    ln = (px ** 2 + py ** 2) ** 0.5
                    delta = 0.25
                    if ln > 1e-3:
                        x += delta * px / ln
                        y += delta * py / ln
                    x = max(0, min(x, cfg.output_shape[1] - 1))
                    y = max(0, min(y, cfg.output_shape[0] - 1))
                    resy = float((4 * y + 2) / cfg.data_shape[0] * (details[b][3] - details[b][1]) + details[b][1])
                    resx = float((4 * x + 2) / cfg.data_shape[1] * (details[b][2] - details[b][0]) + details[b][0])
                    v_score[p] = float(r0[p, int(round(y) + 1e-10), int(round(x) + 1e-10)])
                    single_result.append(resx)
                    single_result.append(resy)
                    single_result.append(1)
                if len(single_result) != 0:
                    result = []
                    result.append(imgid)
                    result.append(category)
                    j = 0
                    while j < len(single_result):
                        result.append(str(int(single_result[j])) + '_' + str(int(single_result[j + 1])) + '_1')
                        j += 3
                    full_result.append(result)

    result_path = args.result
    if not isdir(result_path):
        mkdir_p(result_path)
    result_file = os.path.join(result_path, 'result.csv')
    with open(result_file, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerows(full_result)

    Evaluator = FaiKeypoint2018Evaluator(userAnswerFile=os.path.join(result_path, 'result9.csv'),
                                         standardAnswerFile="fashionAI_key_points_test_a_answer_20180426.csv")
    score = Evaluator.evaluate()

    print(score)

    Evaluator.writerror(result_path=os.path.join(result_path, "toperror1.csv"))
예제 #13
0
def viz_predictions(
        rgb: torch.Tensor,
        word_topk: np.ndarray,
        prob_topk: np.ndarray,
        t_mid: np.ndarray,
        frame_dir: Path,
        confidence: float,
        gt_text: str,
):
    """
    Plot the top-k predicted words on top of the frames if they are
    over a confidence threshold
    """
    if gt_text != "":
        # Put linebreaks for long strings every 40 chars
        gt_text = list(gt_text)
        max_num_chars_per_line = 40
        num_linebreaks = int(len(gt_text) / max_num_chars_per_line)
        for lb in range(num_linebreaks):
            pos = (lb + 1) * max_num_chars_per_line
            gt_text.insert(pos, "\n")
        gt_text = "".join(gt_text)
        gt_text = f"GT: {gt_text}"
    print(f"Saving visualizations to {frame_dir}")
    num_frames = rgb.shape[1]
    height = rgb.shape[2]
    offset = height / 14
    vertical_sep = offset * 2
    for t in tqdm(range(num_frames)):
        t_ix = abs(t_mid - t).argmin()
        sign = word_topk[:, t_ix]
        sign_prob = prob_topk[:, t_ix]
        plt.imshow(im_to_numpy(rgb[:, t]))
        for k, s in enumerate(sign):
            if sign_prob[k] >= confidence:
                pred_text = f"Pred: {s} ({100 * sign_prob[k]:.0f}%)"
                plt.text(
                    offset,
                    offset + k * vertical_sep,
                    pred_text,
                    fontsize=12,
                    fontweight="bold",
                    color="white",
                    verticalalignment="top",
                    bbox=dict(facecolor="green", alpha=0.9),
                )
        if gt_text != "":
            # Hard-coded
            plt.text(
                offset,
                230,
                gt_text,
                fontsize=12,
                fontweight="bold",
                color="white",
                verticalalignment="top",
                bbox=dict(facecolor="blue", alpha=0.9),
            )
        plt.axis("off")
        plt.savefig(frame_dir / f"frame_{t:03d}.png")
        plt.clf()
예제 #14
0
def main(args):
    # create model
    model = network.__dict__[cfg.model](cfg.output_shape,
                                        cfg.num_class,
                                        pretrained=False)
    model = torch.nn.DataParallel(model).cuda()

    test_loader = torch.utils.data.DataLoader(MscocoMulti(cfg, train=False),
                                              batch_size=args.batch *
                                              args.num_gpus,
                                              shuffle=False,
                                              num_workers=args.workers,
                                              pin_memory=True)

    # load trainning weights
    checkpoint_file = os.path.join(args.checkpoint, args.test + '.pth.tar')
    checkpoint = torch.load(checkpoint_file)
    model.load_state_dict(checkpoint['state_dict'])
    print("=> loaded checkpoint '{}' (epoch {})".format(
        checkpoint_file, checkpoint['epoch']))

    # change to evaluation mode
    model.eval()

    print('testing...')
    full_result = []
    for i, (inputs, meta) in tqdm(enumerate(test_loader)):
        # print(i)
        # print(inputs.shape)
        with torch.no_grad():
            input_var = torch.autograd.Variable(inputs.cuda())
            if args.flip == True:
                flip_inputs = inputs.clone()

                #   k = 0
                for i, finp in enumerate(flip_inputs):
                    finp = im_to_numpy(finp)
                    finp = cv2.flip(finp, 1)
                    flip_inputs[i] = im_to_torch(finp)
            #  print(k)
            #  print(1111111111111111)
                flip_input_var = torch.autograd.Variable(flip_inputs.cuda())

            # compute output
            global_outputs, refine_output = model(input_var)
            score_map = refine_output.data.cpu()
            score_map = score_map.numpy()
            #     print(score_map.shape)
            # score_map (128,2,64,48)
            #      xx = inputs.numpy()
            #    print(xx[0].transpose((1,2,0)).shape)
            #      plt.figure(1)
            #     plt.subplot(121)
            #     plt.imshow(xx[0].transpose((1,2,0)))
            #
            #   plt.subplot(122)
            #   plt.imshow(score_map[0][0], cmap='gray', interpolation='nearest')
            #   plt.show()
            if args.flip == True:
                flip_global_outputs, flip_output = model(flip_input_var)
                flip_score_map = flip_output.data.cpu()
                flip_score_map = flip_score_map.numpy()

                for i, fscore in enumerate(flip_score_map):

                    fscore = fscore.transpose((1, 2, 0))
                    fscore = cv2.flip(fscore, 1)
                    #   fscore=fscore[:, :,np.newaxis]
                    #  print(fscore.shape)  # (64,48,2)
                    #  print(2222222222222)
                    fscore = list(fscore.transpose((2, 0, 1)))
                    #  for (q, w) in cfg.symmetry:
                    #     fscore[q], fscore[w] = fscore[w], fscore[q]
                    fscore = np.array(fscore)
                    score_map[i] += fscore
                    score_map[i] /= 2
                # print(score_map[i].shape)
                #  print(score_map.shape)   (128,2,64.48)

            ids = meta['imgID'].numpy()
            imgclass = meta['class']
            #  print(ids)
            det_scores = meta['det_scores']
            for b in range(inputs.size(0)):

                #  print(inputs.size(0))
                details = meta['augmentation_details']
                single_result_dict = {}
                single_result = []

                single_map = score_map[b]  #(2,64,48)
                #   print(single_map.shape)
                r0 = single_map.copy()
                r0 /= 255
                r0 += 0.5
                v_score = np.zeros(10)
                if imgclass[b] == 'chair':
                    c = 0
                elif imgclass[b] == 'bed':
                    c = 1
                elif imgclass[b] == 'sofa':
                    c = 2

                single_map[c] /= np.amax(single_map[c])
                border = 9
                ps = parseHeatmap(single_map[c], thresh=0.20)  #shape 2
                #        print(len(ps[0]))
                #      print(len(ps[1]))
                #     print(1111111111)
                #     plt.imshow(single_map[c], cmap='gray', interpolation='nearest')
                #     plt.show()
                #  print(len(ps[0]))
                for k in range(len(ps[0])):
                    x = ps[0][k] - border  # height
                    y = ps[1][k] - border  # width
                    #   print(cfg.data_shape[0]) # height
                    #   print(cfg.data_shape[1])  # width
                    resy = float((4 * x + 2) / cfg.data_shape[0] *
                                 (details[b][3] - details[b][1]) +
                                 details[b][1])
                    resx = float((4 * y + 2) / cfg.data_shape[1] *
                                 (details[b][2] - details[b][0]) +
                                 details[b][0])
                    #   print(resx,resy)
                    single_result.append(resx)
                    single_result.append(resy)
                    single_result.append(1)
                if len(single_result) != 0:
                    single_result_dict['image_id'] = int(ids[b])
                    single_result_dict['class'] = imgclass[b]
                    single_result_dict['keypoints'] = single_result
                    #     single_result_dict['score'] = float(det_scores[b])*v_score.mean()
                    full_result.append(single_result_dict)

    result_path = args.result
    if not isdir(result_path):
        mkdir_p(result_path)
    result_file = os.path.join(result_path, 'result.json')
    with open(result_file, 'w') as wf:
        json.dump(full_result, wf)
예제 #15
0
파일: face_bbx.py 프로젝트: dd-dos/LUVLi
    def __getitem__(self, index):

        if self.is_train:
            a = self.anno[self.train[index]]
        else:
            a = self.anno[self.valid[index]]

        img_path = os.path.join(self.img_folder, a['img_paths'])

        if a['pts_paths'] == "unknown.xyz":
            pts = a['pts']
        else:
            pts_path = os.path.join(self.img_folder, a['pts_paths'])

            if pts_path[-4:] == '.txt':
                pts = np.loadtxt(pts_path)  # L x 2
            else:
                pts = a['pts']

        pts = np.array(pts)
        # Assume all points are visible for a dataset. This is a multiclass
        # visibility
        visible_multiclass = np.ones(pts.shape[0])

        if a['dataset'] == 'aflw_ours' or a['dataset'] == 'cofw_68':
            # The pts which are labelled -1 in both x and y are not visible points
            self_occluded_landmark = (pts[:, 0] == -1) & (pts[:, 1] == -1)
            external_occluded_landmark = (pts[:, 0] < -1) & (pts[:, 1] < -1)

            visible_multiclass[self_occluded_landmark] = 0
            visible_multiclass[external_occluded_landmark] = 2

            # valid landmarks are those which are external occluded and not occluded
            valid_landmark = (pts[:, 0] != -1) & (pts[:, 1] != -1)

            # The points which are partially occluded have both coordinates as negative but not -1
            # Make them positive
            pts = np.abs(pts)

            # valid_landmark is 0 for to be masked and 1 for not to be masked
            # mask is 1 for to be masked and 0 for not to be masked
            pts_masked = np.ma.array(pts,
                                     mask=np.column_stack(
                                         (1 - valid_landmark,
                                          1 - valid_landmark)))
            pts_mean = np.mean(pts_masked, axis=0)

            # Replace -1 by mean of valid landmarks. Otherwise taking min for
            # calculating geomteric mean of the box can create issues later.
            pts[self_occluded_landmark] = pts_mean.data

            scale_mul_factor = 1.1

        elif a['dataset'] == "aflw" or a['dataset'] == "wflw":
            self_occluded_landmark = (pts[:, 0] <= 0) | (pts[:, 1] <= 0)
            valid_landmark = 1 - self_occluded_landmark
            visible_multiclass[self_occluded_landmark] = 0

            # valid_landmark is 0 for to be masked and 1 for not to be masked
            # mask is 1 for to be masked and 0 for not to be masked
            pts_masked = np.ma.array(pts,
                                     mask=np.column_stack(
                                         (1 - valid_landmark,
                                          1 - valid_landmark)))
            pts_mean = np.mean(pts_masked, axis=0)

            # Replace -1 by mean of valid landmarks. Otherwise taking min for
            # calculating geomteric mean of the box can create issues later.
            pts[self_occluded_landmark] = pts_mean.data

            scale_mul_factor = 1.25

        else:
            scale_mul_factor = 1.1

        pts = torch.Tensor(pts)  # size is 68*2
        s = torch.Tensor([a['scale_provided_det']]) * scale_mul_factor
        c = torch.Tensor(a['objpos_det'])

        # For single-person pose estimation with a centered/scaled figure
        # the image in the original size
        img = imutils.load_image(img_path)

        r = 0
        s_rand = 1
        if self.is_train:  #data augmentation for training data
            s_rand = (1 + sample_from_bounded_gaussian(self.scale_factor / 2.))
            s = s * s_rand

            r = sample_from_bounded_gaussian(self.rot_factor / 2.)

            #print('s shape is ', s.size(), 's is ', s)
            #if np.random.uniform(0, 1, 1) <= 0.6:
            #    r = np.array([0])

            if self.use_flipping:
                # Flip
                if np.random.random() <= 0.5:
                    img = torch.from_numpy(HumanAug.fliplr(
                        img.numpy())).float()
                    pts = HumanAug.shufflelr(pts,
                                             width=img.size(2),
                                             dataset='face')
                    c[0] = img.size(2) - c[0]

            # Color
            img[0, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1)
            img[1, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1)
            img[2, :, :].mul_(np.random.uniform(0.6, 1.4)).clamp_(0, 1)

            if self.use_occlusion:
                # Apply a random black occlusion
                # C x H x W
                patch_center_row = randint(1, img.size(1))
                patch_center_col = randint(1, img.size(2))

                patch_height = randint(1, img.size(1) / 2)
                patch_width = randint(1, img.size(2) / 2)

                row_min = max(0, patch_center_row - patch_height)
                row_max = min(img.size(1), patch_center_row + patch_height)
                col_min = max(0, patch_center_col - patch_width)
                col_max = min(img.size(2), patch_center_col + patch_width)

                img[:, row_min:row_max, col_min:col_max] = 0

        # Prepare points first
        pts_input_res = HumanAug.TransformPts(pts.numpy(), c.numpy(),
                                              s.numpy(), r, self.inp_res,
                                              self.std_size)

        # Some landmark points can go outside after transformation. Determine the
        # extra scaling required.
        # This can only be done for the training points. For validation, we do
        # not know the points location.
        if self.is_train and self.keep_pts_inside:
            # visible copy takes care of whether point is visible or not.
            visible_copy = visible_multiclass.copy()
            visible_copy[visible_multiclass > 1] = 1
            scale_down = get_ideal_scale(pts_input_res,
                                         self.inp_res,
                                         img_path,
                                         visible=visible_copy)
            s = s / scale_down
            s_rand = s_rand / scale_down
            pts_input_res = HumanAug.TransformPts(pts.numpy(), c.numpy(),
                                                  s.numpy(), r, self.inp_res,
                                                  self.std_size)

        if a['dataset'] == "aflw":
            meta_box_size = a['box_size']
            # We convert the meta_box size also to the input res. The meta_box
            # is not formed by the landmark point but is supplied externally.
            # We assume the meta_box as two points [meta_box_size, 0] and [0, 0]
            # apply the transformation on top of it
            temp = HumanAug.TransformPts(
                np.array([[meta_box_size, 0], [0, 0]]), c.numpy(), s.numpy(),
                r, self.inp_res, self.std_size)
            # Passed as array of 2 x 2
            # we only want the transformed distance between the points
            meta_box_size_input_res = np.linalg.norm(temp[1] - temp[0])
        else:
            meta_box_size_input_res = -10  # some invalid number

        # pts_input_res is in the size of 256 x 256
        # Bring down to 64 x 64 since finally heatmap will be 64 x 64
        pts_aug = pts_input_res * (1. * self.out_res / self.inp_res)

        # Prepare image
        inp = HumanAug.crop(imutils.im_to_numpy(img), c.numpy(), s.numpy(), r,
                            self.inp_res, self.std_size)
        inp_vis = inp
        inp = imutils.im_to_torch(inp).float()  # 3*256*256

        # Generate proxy ground truth heatmap
        heatmap, pts_aug = HumanPts.pts2heatmap(pts_aug,
                                                [self.out_res, self.out_res],
                                                sigma=self.sigma)
        heatmap = torch.from_numpy(heatmap).float()
        heatmap_mask = HumanPts.pts2mask(pts_aug, [self.out_res, self.out_res],
                                         bb=10)

        if self.is_train:
            return inp, heatmap, pts_input_res, heatmap_mask, s_rand, visible_multiclass, meta_box_size_input_res
        else:
            return inp, heatmap, pts_input_res, c, s, index, inp_vis, s_rand, visible_multiclass, meta_box_size_input_res
예제 #16
0
    def generateSampleFace(self, idx):
        sf = self.scale_factor
        rf = self.rot_factor

        #print('Filename -->{}'.format(self.anno[idx][:-4] + '.jpg'))
        main_pts = sio.loadmat(self.anno[idx])

        pts = main_pts['pt3d_68'][0:2, :].transpose()

        #print(pts.dtype)

        pts = np.float32(pts)
        pts = torch.from_numpy(pts)

        #print('pts -> {}'.format(pts))

        pts = torch.clamp(pts, min=0)

        mins_ = torch.min(pts, 0)[0].view(2)  # min vals
        maxs_ = torch.max(pts, 0)[0].view(2)  # max vals

        c = torch.FloatTensor((maxs_[0] - (maxs_[0] - mins_[0]) / 2,
                               maxs_[1] - (maxs_[1] - mins_[1]) / 2))
        #print('min Values format -> {}'.format(mins_.dtype))
        #print('max Values format -> {}'.format(maxs_.dtype))

        c[1] -= ((maxs_[1] - mins_[1]) * 0.12)
        s = (maxs_[0] - mins_[0] + maxs_[1] - mins_[1]) / 195

        img = load_image(self.anno[idx][:-4] + '.jpg')

        r = 0
        if self.is_train:
            s = s * torch.randn(1).mul_(sf).add_(1).clamp(1 - sf, 1 + sf)[0]
            r = torch.randn(1).mul_(rf).clamp(
                -2 * rf, 2 * rf)[0] if random.random() <= 0.6 else 0

            if random.random() <= 0.5:
                img = torch.from_numpy(fliplr(img.numpy())).float()
                pts = shufflelr(pts, width=img.size(2), dataset='aflw2000')
                c[0] = img.size(2) - c[0]

            img[0, :, :].mul_(random.uniform(0.7, 1.3)).clamp_(0, 1)
            img[1, :, :].mul_(random.uniform(0.7, 1.3)).clamp_(0, 1)
            img[2, :, :].mul_(random.uniform(0.7, 1.3)).clamp_(0, 1)

        # Prepare image and groundtruth map
        inp = HumanAug.crop(imutils.im_to_numpy(img), c.numpy(), s, r, 256,
                            200)

        inp = imutils.im_to_torch(inp).float()

        pts_input_res = HumanAug.TransformPts(pts.numpy(), c.numpy(), s, r,
                                              256, 200)
        pts_aug = pts_input_res * (1. * 64 / 256)

        # Generate ground truth
        heatmap, pts_aug = HumanPts.pts2heatmap(pts_aug, [64, 64], sigma=1)
        heatmap = torch.from_numpy(heatmap).float()

        # inp = crop(img, c, s, [256, 256], rot=r)
        # # inp = color_normalize(inp, self.mean, self.std)

        # tpts = pts.clone()
        # out = torch.zeros(self.nParts, 64, 64)
        # for i in range(self.nParts):
        #     if tpts[i, 0] > 0:
        #         tpts[i, 0:2] = to_torch(transform(tpts[i, 0:2] + 1, c, s, [64, 64], rot=r))
        #         out[i] = draw_labelmap(out[i], tpts[i] - 1, sigma=1)

        return inp, heatmap, pts, c, s, pts_input_res