Пример #1
0
def compute_error(axis, cont_labels, preds, idx_tensor):

    if axis == "yaw":
        dim = 0
    elif axis == "pitch":
        dim = 1
    elif axis == "roll":
        dim = 2
    else:
        raise IndexError("{} is not in ['yaw', 'pitch', 'roll']".format(axis))

    label_cont = cont_labels[:, dim].float()
    predictions = utils.softmax_temperature(preds.data, 1)
    predictions = torch.sum(predictions * idx_tensor, 1).cpu() * 3 - 99
    error = torch.sum(torch.abs(predictions - label_cont))

    return error
Пример #2
0
        images = Variable(images).cuda(gpu)
        total += cont_labels.size(0)

        label_yaw = cont_labels[:, 0].float()
        label_pitch = cont_labels[:, 1].float()
        label_roll = cont_labels[:, 2].float()

        yaw, pitch, roll = model(images)

        # Binned predictions
        _, yaw_bpred = torch.max(yaw.data, 1)
        _, pitch_bpred = torch.max(pitch.data, 1)
        _, roll_bpred = torch.max(roll.data, 1)

        # Continuous predictions
        yaw_predicted = utils.softmax_temperature(yaw.data, 1)
        pitch_predicted = utils.softmax_temperature(pitch.data, 1)
        roll_predicted = utils.softmax_temperature(roll.data, 1)

        yaw_predicted = torch.sum(yaw_predicted * idx_tensor, 1).cpu() * 3 - 99
        pitch_predicted = torch.sum(pitch_predicted * idx_tensor,
                                    1).cpu() * 3 - 99
        roll_predicted = torch.sum(roll_predicted * idx_tensor,
                                   1).cpu() * 3 - 99

        # Mean absolute error
        yaw_error += torch.sum(torch.abs(yaw_predicted - label_yaw))
        pitch_error += torch.sum(torch.abs(pitch_predicted - label_pitch))
        roll_error += torch.sum(torch.abs(roll_predicted - label_roll))

        # Save first image in batch with pose cube or axis.
Пример #3
0
    def test_network(self):

        print 'Ready to test network.'

        total = 0

        yaw_error = .0
        pitch_error = .0
        roll_error = .0

        l1loss = torch.nn.L1Loss(size_average=False)

        for i, (images, labels, cont_labels,
                name) in enumerate(self.test_loader):
            images = Variable(images).cuda(gpu)
            total += cont_labels.size(0)

            label_yaw = cont_labels[:, 0].float()
            label_pitch = cont_labels[:, 1].float()
            label_roll = cont_labels[:, 2].float()

            yaw, pitch, roll = model(images)

            # Binned predictions
            _, yaw_bpred = torch.max(yaw.data, 1)
            _, pitch_bpred = torch.max(pitch.data, 1)
            _, roll_bpred = torch.max(roll.data, 1)

            # Continuous predictions
            yaw_predicted = utils.softmax_temperature(yaw.data, 1)
            pitch_predicted = utils.softmax_temperature(pitch.data, 1)
            roll_predicted = utils.softmax_temperature(roll.data, 1)

            yaw_predicted = torch.sum(yaw_predicted * idx_tensor,
                                      1).cpu() * 3 - 99
            pitch_predicted = torch.sum(pitch_predicted * idx_tensor,
                                        1).cpu() * 3 - 99
            roll_predicted = torch.sum(roll_predicted * idx_tensor,
                                       1).cpu() * 3 - 99

            # Mean absolute error
            yaw_error += torch.sum(torch.abs(yaw_predicted - label_yaw))
            pitch_error += torch.sum(torch.abs(pitch_predicted - label_pitch))
            roll_error += torch.sum(torch.abs(roll_predicted - label_roll))

            # Save first image in batch with pose cube or axis.
            if args.save_viz:
                name = name[0]
                if args.dataset == 'BIWI':
                    cv2_img = cv2.imread(
                        os.path.join(args.data_dir, name + '_rgb.png'))
                else:
                    cv2_img = cv2.imread(
                        os.path.join(args.data_dir, name + '.jpg'))
                if args.batch_size == 1:
                    error_string = 'y %.2f, p %.2f, r %.2f' % \
                                   (torch.sum(torch.abs(yaw_predicted - label_yaw)),
                                    torch.sum(torch.abs(pitch_predicted - label_pitch)),
                                    torch.sum(torch.abs(roll_predicted - label_roll)))
                    cv2.putText(cv2_img,
                                error_string, (30, cv2_img.shape[0] - 30),
                                fontFace=1,
                                fontScale=1,
                                color=(0, 0, 255),
                                thickness=2)
                # utils.plot_pose_cube(cv2_img, yaw_predicted[0], pitch_predicted[0], roll_predicted[0], size=100)
                utils.draw_axis(cv2_img,
                                yaw_predicted[0],
                                pitch_predicted[0],
                                roll_predicted[0],
                                tdx=200,
                                tdy=200,
                                size=100)
                cv2.imwrite(os.path.join('output/images', name + '.jpg'),
                            cv2_img)

        print(
            'Test error in degrees of the model on the ' + str(total) +
            ' test images. Yaw: %.4f, Pitch: %.4f, Roll: %.4f' %
            (yaw_error / total, pitch_error / total, roll_error / total))

        return yaw_error, pitch_error, roll_error
Пример #4
0
def detect_headpose_resnet_112():
    facedet = FaceDetection()
    #model path
    snapshot_path = "output/no_mask_03_gray_biwi_300w_lp_cosin_112/gray_biwi_300W_LP_squire_epoch_30.pkl"
    cap = cv2.VideoCapture(' test_data/20200522164730261_0.avi')

    model = ResidualNet("ImageNet", 50, 66, "CBAM")
    new_state_dict = OrderedDict()
    saved_state_dict = torch.load(snapshot_path)
    for k, v in saved_state_dict.items():
        name = k[7:]
        new_state_dict[name] = v
    model.load_state_dict(new_state_dict)
    model.cuda(0)
    model.eval()
    transformations = transforms.Compose([
        transforms.Scale(112),
        #  transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.392, 0.392, 0.392],
                             std=[0.254, 0.254, 0.254])
    ])

    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter('result_data/result_with_mask_resnet_112.avi',
                          fourcc, 20.0, (frame_width, frame_height))
    font = cv2.FONT_HERSHEY_SIMPLEX

    while cap.isOpened():
        ret, frame = cap.read()
        if ret is True:
            rect = facedet.detection_image(frame)
            h, w = 0, 0
            k = 0.35
            b = 30
            if len(rect) == 0:
                out.write(frame)
                continue
            for i, data in enumerate(rect):
                temp_w, temp_h = data[3] - data[1], data[4] - data[2]
                if (i == 0) or (h * w < temp_h * temp_w):
                    h, w = temp_h, temp_w
                    x1, y1, x2, y2 = data[1], data[2], data[3], data[4]
                ratio = h / w
                if ratio > 1:
                    ratio = ratio - 1
                    x1 -= (ratio / 2 * w + k * h)
                    y1 -= (k * h + b)
                    x2 += (ratio / 2 * w + k * h)
                    y2 += (k * h - b)
                    # crop_img.append(img.crop((int(x1),int(ymin),int(x1),int(ymax))))
                else:
                    ratio = w / h - 1
                    x1 -= (k * w)
                    y1 -= (ratio / 2 * h + k * w + b)
                    x2 += (k * w)
                    y2 += (ratio / 2 * h + k * w - b)
            crop_img = frame[int(y1):int(y2) + 1, int(x1):int(x2) + 1]
            # change to rgb
            cv2.cvtColor(crop_img, cv2.COLOR_BGR2RGB, crop_img)
            detect_img = Image.fromarray(crop_img)

            cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)),
                          (0, 255, 0), 1)
            # cv2.putText(frame, str(data[0]), (data[1], data[2] + 30), font, 1.2, (0, 255, 0), 1)

            # head pose
            idx_tensor = [idx for idx in range(66)]
            idx_tensor = torch.FloatTensor(idx_tensor).cuda(0)
            yaw_error = .0
            pitch_error = .0
            roll_error = .0
            with torch.no_grad():
                detect_img = transformations(detect_img)
                detect_img = detect_img.unsqueeze(dim=0)
                yaw, pitch, roll = model(detect_img.cuda(0))
            # Binned predictions
            _, yaw_bpred = torch.max(yaw.data, 1)
            _, pitch_bpred = torch.max(pitch.data, 1)
            _, roll_bpred = torch.max(roll.data, 1)

            # Continuous predictions
            yaw_predicted = utils.softmax_temperature(yaw.data, 1)
            pitch_predicted = utils.softmax_temperature(pitch.data, 1)
            roll_predicted = utils.softmax_temperature(roll.data, 1)

            yaw_predicted = torch.sum(yaw_predicted * idx_tensor,
                                      1).cpu() * 3 - 99
            pitch_predicted = torch.sum(pitch_predicted * idx_tensor,
                                        1).cpu() * 3 - 99
            roll_predicted = torch.sum(roll_predicted * idx_tensor,
                                       1).cpu() * 3 - 99
            utils.draw_axis(frame,
                            yaw_predicted[0],
                            pitch_predicted[0],
                            roll_predicted[0],
                            tdx=(x2 - x1) // 2 + x1,
                            tdy=(y2 - y1) // 2 + y1,
                            size=50)
            put_ptich_str = "ptich:{:.4f}".format(pitch_predicted[0])
            put_yaw_str = "yaw:{:.4f}".format(yaw_predicted[0])
            cv2.putText(frame, str(put_ptich_str), (int(x1), int(y1) - 30),
                        font, 1, (0, 255, 0), 1)
            cv2.putText(frame, str(put_yaw_str), (int(x1), int(y1) - 60), font,
                        1, (0, 255, 0), 1)
            out.write(frame)
        else:
            print("finish")
            out.release()
            break

    # cv2.cvtColor(img,cv2.COLOR_GRAY2RGB)
    # cap.release()
    # out.release()
    # np.uint8
    print("finish")