Exemplo n.º 1
0
def estimate(tenOrig):
    global netNetwork

    if netNetwork is None:
        netNetwork = Network('bsds500').cuda().eval()
    # end

    intPadWidth = 32
    tenInput = Pad(intPadWidth, padding_mode='edge')(tenOrig).float()
    intWidth = tenInput.shape[2]
    intHeight = tenInput.shape[1]

    tenOutput = torch.zeros(tenOrig.shape)
    arrShift = [100, 150, 200]
    for intShift in arrShift:
        tenInf = netNetwork(tenInput.cuda().view(1, 3, intHeight, intWidth),
                            intShift)[0, :, :, :].cpu()
        tenOutput = torch.maximum(
            tenOutput,
            CenterCrop(tenOrig.shape[1:3])(tenInf).float())

    return tenOutput
Exemplo n.º 2
0
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--insize", default=config.insize, type=tuple)
    parser.add_argument("--local_grid_size",
                        default=config.local_grid_size,
                        type=tuple)
    parser.add_argument("--val_json", default=config.val_json, type=str)
    parser.add_argument("--image_root", default=config.image_root, type=str)
    parser.add_argument("--checkpoint", type=str, default="model/best288.pth")
    args = parser.parse_args()

    val_json = json.load(open(args.val_json, "r"))
    images = {}
    for image in val_json["images"]:
        images[int(image['id'])] = image['file_name'], [], [], [], []
    for anno in val_json['annotations']:  # 遍历所有anno
        if anno['num_keypoints'] < 1:
            continue
        if anno['iscrowd'] != 0:
            continue
        image_id = int(anno['image_id'])
        d = np.array(anno['keypoints'], dtype='float32').reshape(-1, 3)

        keypoints = d[:, [1, 0]]  # array of y,x  获得每个点的坐标 # 为啥要反过来?
        bbox = anno['bbox']  # 人体框
        is_visible = d[:, 2] == 2  # 是否可见
        is_labeled = d[:, 2] >= 1  # 是否有标签

        entry = images[image_id]  # 得到
        entry[1].append(np.asarray(keypoints))
        entry[2].append(np.asarray(bbox, dtype='float32'))
        entry[3].append(np.asarray(is_visible).astype(np.bool))
        entry[4].append(np.asarray(is_labeled).astype(np.bool))
    image_paths = []
    keypoints = []
    bbox = []
    is_visible = []
    is_labeled = []

    for filename, k, b, v, l in images.values():
        if len(k) == 0:
            continue
        image_paths.append(filename)
        bbox.append(b)
        keypoints.append(k)
        is_visible.append(v)
        is_labeled.append(l)

    pose_proposal_net = model.poseprosalnet(KEYPOINT_NAMES, EDGES,
                                            args.local_grid_size, args.insize,
                                            args.checkpoint)
    pose_proposal_net.load_state_dict(torch.load(args.checkpoint))
    pose_proposal_net.eval()
    CUDA = torch.cuda.is_available()
    if CUDA:
        pose_proposal_net.cuda()
    pck_object = [[], [], [], []]

    with torch.no_grad():
        for i in range(len(image_paths)):
            image = Image.open(args.image_root + "/" + image_paths[i])
            oriW, oriH = image.size

            h_pad = int(np.clip(((max(oriH, oriW) - oriH) + 1) // 2, 0,
                                1e6))  # 填充在两边
            w_pad = int(np.clip(((max(oriH, oriW) - oriW) + 1) // 2, 0, 1e6))
            image = Pad((w_pad, h_pad), (123, 116, 103))(image)

            padedWH = max(oriH, oriW)

            image = Resize(args.insize)(image)
            image = ToTensor()(image)
            image = Normalize(mean=[0.485, 0.456, 0.406],
                              std=[0.229, 0.224, 0.225])(image)
            image = image.unsqueeze(0)
            if CUDA:
                image = image.cuda()
            pre = pose_proposal_net(image)

            ####################pad#######################
            paded_keypoints = [
                pad_keypoint(points, h_pad, w_pad) for points in keypoints[i]
            ]

            paded_bbox = []
            for x, y, bw, bh in bbox[i]:
                [[y, x]] = pad_keypoint(np.array([[y, x]]), h_pad, w_pad)
                paded_bbox.append(np.array([x, y, bw, bh]))
            ###################resize#############################
            new_keypoints = [
                resize_point(points, (padedWH, padedWH), args.insize)
                for points in paded_keypoints
            ]
            new_bbox = []
            for x, y, bw, bh in paded_bbox:
                [[y, x]] = resize_point(np.array([[y, x]]), (padedWH, padedWH),
                                        args.insize)
                bw *= args.insize[1] / padedWH
                bh *= args.insize[0] / padedWH
                new_bbox.append(np.array([x, y, bw, bh], dtype='float32'))

            humans = utils.get_humans_by_feature(pre, args.insize,
                                                 pose_proposal_net.outsize,
                                                 args.local_grid_size, 0.02)
            pck_object[0].append(new_keypoints)
            pck_object[1].append(humans)
            pck_object[2].append(new_bbox)
            pck_object[3].append(is_visible[i])
        evaluation(pck_object)