示例#1
0
def get_model(model_path):
    return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3}
    RetinaFace = create_retinaface(return_layers)

    # Load trained model
    retina_dict = RetinaFace.state_dict()
    pre_state_dict = torch.load(model_path)
    pretrained_dict = {
        k[7:]: v
        for k, v in pre_state_dict.items() if k[7:] in retina_dict
    }
    RetinaFace.load_state_dict(pretrained_dict)

    RetinaFace = RetinaFace.cuda()
    RetinaFace.eval()
    return RetinaFace
示例#2
0
def main():
    args = get_args()
    if not os.path.exists(args.save_path):
        os.mkdir(args.save_path)
    log_path = os.path.join(args.save_path, 'log')
    if not os.path.exists(log_path):
        os.mkdir(log_path)

    writer = SummaryWriter(log_dir=log_path)

    data_path = args.data_path
    train_path = os.path.join(data_path, 'train/label.txt')
    val_path = os.path.join(data_path, 'val/label.txt')
    # dataset_train = TrainDataset(train_path,transform=transforms.Compose([RandomCroper(),RandomFlip()]))
    dataset_train = TrainDataset(train_path,
                                 transform=transforms.Compose(
                                     [Resizer(), PadToSquare()]))
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=8,
                                  batch_size=args.batch,
                                  collate_fn=collater,
                                  shuffle=True)
    # dataset_val = ValDataset(val_path,transform=transforms.Compose([RandomCroper()]))
    dataset_val = ValDataset(val_path,
                             transform=transforms.Compose(
                                 [Resizer(), PadToSquare()]))
    dataloader_val = DataLoader(dataset_val,
                                num_workers=8,
                                batch_size=args.batch,
                                collate_fn=collater)

    total_batch = len(dataloader_train)

    # Create the model
    # if args.depth == 18:
    #     retinaface = model.resnet18(num_classes=2, pretrained=True)
    # elif args.depth == 34:
    #     retinaface = model.resnet34(num_classes=2, pretrained=True)
    # elif args.depth == 50:
    #     retinaface = model.resnet50(num_classes=2, pretrained=True)
    # elif args.depth == 101:
    #     retinaface = model.resnet101(num_classes=2, pretrained=True)
    # elif args.depth == 152:
    #     retinaface = model.resnet152(num_classes=2, pretrained=True)
    # else:
    #     raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    # Create torchvision model
    return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3}
    retinaface = torchvision_model.create_retinaface(return_layers)

    retinaface = retinaface.cuda()
    retinaface = torch.nn.DataParallel(retinaface).cuda()
    retinaface.training = True

    optimizer = optim.Adam(retinaface.parameters(), lr=1e-3)
    # optimizer = optim.SGD(retinaface.parameters(), lr=1e-2, momentum=0.9, weight_decay=0.0005)
    # scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)
    # scheduler  = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)
    #scheduler  = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10,30,60], gamma=0.1)

    print('Start to train.')

    epoch_loss = []
    iteration = 0

    for epoch in range(args.epochs):
        retinaface.train()

        # Training
        for iter_num, data in enumerate(dataloader_train):
            optimizer.zero_grad()
            classification_loss, bbox_regression_loss, ldm_regression_loss = retinaface(
                [data['img'].cuda().float(), data['annot']])
            classification_loss = classification_loss.mean()
            bbox_regression_loss = bbox_regression_loss.mean()
            ldm_regression_loss = ldm_regression_loss.mean()

            # loss = classification_loss + 1.0 * bbox_regression_loss + 0.5 * ldm_regression_loss
            loss = classification_loss + bbox_regression_loss + ldm_regression_loss

            loss.backward()
            optimizer.step()

            if iter_num % args.verbose == 0:
                log_str = "\n---- [Epoch %d/%d, Batch %d/%d] ----\n" % (
                    epoch, args.epochs, iter_num, total_batch)
                table_data = [['loss name', 'value'],
                              ['total_loss', str(loss.item())],
                              [
                                  'classification',
                                  str(classification_loss.item())
                              ], ['bbox',
                                  str(bbox_regression_loss.item())],
                              ['landmarks',
                               str(ldm_regression_loss.item())]]
                table = AsciiTable(table_data)
                log_str += table.table
                print(log_str)
                # write the log to tensorboard
                writer.add_scalar('losses:', loss.item(),
                                  iteration * args.verbose)
                writer.add_scalar('class losses:', classification_loss.item(),
                                  iteration * args.verbose)
                writer.add_scalar('box losses:', bbox_regression_loss.item(),
                                  iteration * args.verbose)
                writer.add_scalar('landmark losses:',
                                  ldm_regression_loss.item(),
                                  iteration * args.verbose)
                iteration += 1

        # Eval
        if epoch % args.eval_step == 0:
            print('-------- RetinaFace Pytorch --------')
            print('Evaluating epoch {}'.format(epoch))
            recall, precision = eval_widerface.evaluate(
                dataloader_val, retinaface)
            print('Recall:', recall)
            print('Precision:', precision)

            writer.add_scalar('Recall:', recall, epoch * args.eval_step)
            writer.add_scalar('Precision:', precision, epoch * args.eval_step)

        # Save model
        if (epoch + 1) % args.save_step == 0:
            torch.save(retinaface.state_dict(),
                       args.save_path + '/model_epoch_{}.pt'.format(epoch + 1))

    writer.close()
示例#3
0
def main():
    args = get_args()

    # Create retinaface
    return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3}
    RetinaFace = torchvision_model.create_retinaface(return_layers)

    retina_dict = RetinaFace.state_dict()
    pre_state_dict = torch.load(args.f_model)
    pretrained_dict = {
        k[7:]: v
        for k, v in pre_state_dict.items() if k[7:] in retina_dict
    }
    RetinaFace.load_state_dict(pretrained_dict)

    RetinaFace = RetinaFace.cuda()
    RetinaFace.eval()

    print('Retinaface create success.')

    # Create hopenet
    Hopenet = hopenet.Hopenet(torchvision.models.resnet.Bottleneck,
                              [3, 4, 6, 3], 66)

    saved_state_dict = torch.load(args.p_model)
    Hopenet.load_state_dict(saved_state_dict)
    Hopenet = Hopenet.cuda()
    Hopenet.eval()

    print('Hopenet create success.')

    idx_tensor = [idx for idx in range(66)]
    idx_tensor = torch.FloatTensor(idx_tensor).cuda()

    transformations = transforms.Compose([
        transforms.Scale(224),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    if args.type == 'image':
        img = cv2.imread(args.image_path)

        img = torch.from_numpy(img)
        img = img.permute(2, 0, 1)

        if not args.scale == 1.0:
            size1 = int(img.shape[1] / args.scale)
            size2 = int(img.shape[2] / args.scale)
            img = resize(img.float(), (size1, size2))

        input_img = img.unsqueeze(0).float().cuda()
        picked_boxes, picked_landmarks = eval_widerface.get_detections(
            input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3)

        np_img = img.cpu().permute(1, 2, 0).numpy()
        np_img.astype(int)
        img = np_img.astype(np.uint8)

        for j, boxes in enumerate(picked_boxes):
            if boxes is not None:
                for box, landmark in zip(boxes, picked_landmarks[j]):
                    # Crop face
                    x_min = int(box[0])
                    x_max = int(box[2])
                    y_min = int(box[1])
                    y_max = int(box[3])
                    # Clip
                    x_min = x_min if x_min > 0 else 0
                    x_max = x_max if x_max < img.shape[1] else img.shape[1]
                    y_min = y_min if y_min > 0 else 0
                    y_max = y_max if y_max < img.shape[0] else img.shape[0]

                    if not x_min < x_max or not y_min < y_max:
                        continue

                    bbox_height = abs(y_max - y_min)
                    face_img = img[y_min:y_max, x_min:x_max]
                    face_img = Image.fromarray(face_img)

                    # Transform
                    face_img = transformations(face_img)
                    img_shape = face_img.size()
                    face_img = face_img.view(1, img_shape[0], img_shape[1],
                                             img_shape[2])
                    face_img = face_img.cuda()

                    yaw, pitch, roll = Hopenet(face_img)

                    yaw_predicted = F.softmax(yaw)
                    pitch_predicted = F.softmax(pitch)
                    roll_predicted = F.softmax(roll)
                    # Get continuous predictions in degrees.
                    yaw_predicted = torch.sum(
                        yaw_predicted.data[0] * idx_tensor) * 3 - 99
                    pitch_predicted = torch.sum(
                        pitch_predicted.data[0] * idx_tensor) * 3 - 99
                    roll_predicted = torch.sum(
                        roll_predicted.data[0] * idx_tensor) * 3 - 99

                    utils.draw_axis(img,
                                    yaw_predicted,
                                    pitch_predicted,
                                    roll_predicted,
                                    tdx=(x_min + x_max) / 2,
                                    tdy=(y_min + y_max) / 2,
                                    size=bbox_height / 2)
                    cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]),
                                  (255, 0, 255),
                                  thickness=2)
                    # cv2.circle(img,(landmark[0],landmark[1]),radius=1,color=(0,0,255),thickness=2)
                    # cv2.circle(img,(landmark[2],landmark[3]),radius=1,color=(0,255,0),thickness=2)
                    # cv2.circle(img,(landmark[4],landmark[5]),radius=1,color=(255,0,0),thickness=2)
                    # cv2.circle(img,(landmark[6],landmark[7]),radius=1,color=(0,255,255),thickness=2)
                    # cv2.circle(img,(landmark[8],landmark[9]),radius=1,color=(255,255,0),thickness=2)

            cv2.imshow('RetinaFace-Hopenet', img)
            key = cv2.waitKey()

    else:
        # Read video
        cap = cv2.VideoCapture(args.video_path)

        codec = cv2.VideoWriter_fourcc(*'MJPG')

        width = int(cap.get(3))
        height = int(cap.get(4))

        cap.set(cv2.CAP_PROP_FRAME_WIDTH, width)
        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
        fps = 25.0

        out = cv2.VideoWriter(args.out, codec, fps, (width, height))

        while (True):
            ret, img = cap.read()

            if not ret:
                print('Video open error.')
                break

            img = torch.from_numpy(img)
            img = img.permute(2, 0, 1)

            if not args.scale == 1.0:
                size1 = int(img.shape[1] / args.scale)
                size2 = int(img.shape[2] / args.scale)
                img = resize(img.float(), (size1, size2))

            input_img = img.unsqueeze(0).float().cuda()
            picked_boxes, picked_landmarks = eval_widerface.get_detections(
                input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3)

            # np_img = resized_img.cpu().permute(1,2,0).numpy()
            np_img = img.cpu().permute(1, 2, 0).numpy()
            np_img.astype(int)
            img = np_img.astype(np.uint8)

            for j, boxes in enumerate(picked_boxes):
                if boxes is not None:
                    for box, landmark in zip(boxes, picked_landmarks[j]):
                        # Crop face
                        x_min = int(box[0])
                        x_max = int(box[2])
                        y_min = int(box[1])
                        y_max = int(box[3])
                        # Clip
                        x_min = x_min if x_min > 0 else 0
                        x_max = x_max if x_max < img.shape[1] else img.shape[1]
                        y_min = y_min if y_min > 0 else 0
                        y_max = y_max if y_max < img.shape[0] else img.shape[0]

                        if not x_min < x_max or not y_min < y_max:
                            continue

                        bbox_height = abs(y_max - y_min)
                        face_img = img[y_min:y_max, x_min:x_max]
                        face_img = Image.fromarray(face_img)

                        # Transform
                        face_img = transformations(face_img)
                        img_shape = face_img.size()
                        face_img = face_img.view(1, img_shape[0], img_shape[1],
                                                 img_shape[2])
                        face_img = face_img.cuda()

                        yaw, pitch, roll = Hopenet(face_img)

                        yaw_predicted = F.softmax(yaw)
                        pitch_predicted = F.softmax(pitch)
                        roll_predicted = F.softmax(roll)
                        # Get continuous predictions in degrees.
                        yaw_predicted = torch.sum(
                            yaw_predicted.data[0] * idx_tensor) * 3 - 99
                        pitch_predicted = torch.sum(
                            pitch_predicted.data[0] * idx_tensor) * 3 - 99
                        roll_predicted = torch.sum(
                            roll_predicted.data[0] * idx_tensor) * 3 - 99

                        utils.draw_axis(img,
                                        yaw_predicted,
                                        pitch_predicted,
                                        roll_predicted,
                                        tdx=(x_min + x_max) / 2,
                                        tdy=(y_min + y_max) / 2,
                                        size=bbox_height / 2)
                        cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]),
                                      (255, 0, 255),
                                      thickness=2)
                        # cv2.rectangle(img,(x_min,y_min),(x_max,y_max),(255,0,255),thickness=2)
                        cv2.circle(img, (landmark[0], landmark[1]),
                                   radius=1,
                                   color=(0, 0, 255),
                                   thickness=2)
                        cv2.circle(img, (landmark[2], landmark[3]),
                                   radius=1,
                                   color=(0, 255, 0),
                                   thickness=2)
                        cv2.circle(img, (landmark[4], landmark[5]),
                                   radius=1,
                                   color=(255, 0, 0),
                                   thickness=2)
                        cv2.circle(img, (landmark[6], landmark[7]),
                                   radius=1,
                                   color=(0, 255, 255),
                                   thickness=2)
                        cv2.circle(img, (landmark[8], landmark[9]),
                                   radius=1,
                                   color=(255, 255, 0),
                                   thickness=2)

            out.write(img)
            cv2.imshow('RetinaFace-Pytorch', img)
            key = cv2.waitKey(1)
            if key == ord('q'):
                print('Now quit.')
                break

        cap.release()
        out.release()
    cv2.destroyAllWindows()
示例#4
0
def main():
    args = get_args()
    # Create torchvision model
    return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3}
    RetinaFace = torchvision_model.create_retinaface(return_layers)

    # Load trained model
    retina_dict = RetinaFace.state_dict()
    pre_state_dict = torch.load(args.model_path)
    pretrained_dict = {
        k[7:]: v
        for k, v in pre_state_dict.items() if k[7:] in retina_dict
    }
    RetinaFace.load_state_dict(pretrained_dict)

    RetinaFace = RetinaFace.cuda()
    RetinaFace.eval()

    # Read image
    img = skimage.io.imread(args.image_path)
    img = torch.from_numpy(img)
    img = img.permute(2, 0, 1)

    if not args.scale == 1.0:
        size1 = int(img.shape[1] / args.scale)
        size2 = int(img.shape[2] / args.scale)
        img = resize(img.float(), (size1, size2))

    input_img = img.unsqueeze(0).float().cuda()
    picked_boxes, picked_landmarks = eval_widerface.get_detections(
        input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3)

    # np_img = resized_img.cpu().permute(1,2,0).numpy()
    np_img = img.cpu().permute(1, 2, 0).numpy()
    np_img.astype(int)
    img = cv2.cvtColor(np_img.astype(np.uint8), cv2.COLOR_BGR2RGB)

    for j, boxes in enumerate(picked_boxes):
        if boxes is not None:
            for box, landmark in zip(boxes, picked_landmarks[j]):
                cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]),
                              (0, 0, 255),
                              thickness=2)
                cv2.circle(img, (landmark[0], landmark[1]),
                           radius=1,
                           color=(0, 0, 255),
                           thickness=2)
                cv2.circle(img, (landmark[2], landmark[3]),
                           radius=1,
                           color=(0, 255, 0),
                           thickness=2)
                cv2.circle(img, (landmark[4], landmark[5]),
                           radius=1,
                           color=(255, 0, 0),
                           thickness=2)
                cv2.circle(img, (landmark[6], landmark[7]),
                           radius=1,
                           color=(0, 255, 255),
                           thickness=2)
                cv2.circle(img, (landmark[8], landmark[9]),
                           radius=1,
                           color=(255, 255, 0),
                           thickness=2)

    image_name = args.image_path.split('/')[-1]
    save_path = os.path.join(args.save_path, image_name)
    cv2.imwrite(save_path, img)
    cv2.imshow('RetinaFace-Pytorch', img)
    cv2.waitKey()
示例#5
0
def main():
    args = get_args()
    # Create torchvision model
    return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3}
    RetinaFace = torchvision_model.create_retinaface(return_layers)

    # Load trained model
    retina_dict = RetinaFace.state_dict()
    pre_state_dict = torch.load(args.model_path)
    pretrained_dict = {
        k[7:]: v
        for k, v in pre_state_dict.items() if k[7:] in retina_dict
    }
    RetinaFace.load_state_dict(pretrained_dict)

    RetinaFace = RetinaFace.cuda()
    RetinaFace.eval()

    # Read video
    cap = cv2.VideoCapture(args.video_path)

    codec = cv2.VideoWriter_fourcc(*'MJPG')

    width = int(cap.get(3))
    height = int(cap.get(4))

    cap.set(cv2.CAP_PROP_FRAME_WIDTH, width)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
    fps = 25.0

    out = cv2.VideoWriter('args.save_path', codec, fps, (width, height))

    while (True):
        ret, img = cap.read()

        if not ret:
            print('Video open error.')
            break

        img = torch.from_numpy(img)
        img = img.permute(2, 0, 1)

        if not args.scale == 1.0:
            size1 = int(img.shape[1] / args.scale)
            size2 = int(img.shape[2] / args.scale)
            img = resize(img.float(), (size1, size2))

        input_img = img.unsqueeze(0).float().cuda()

        picked_boxes, picked_landmarks = eval_widerface.get_detections(
            input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3)

        # np_img = resized_img.cpu().permute(1,2,0).numpy()
        np_img = img.cpu().permute(1, 2, 0).numpy()
        np_img.astype(int)
        img = np_img.astype(np.uint8)

        for j, boxes in enumerate(picked_boxes):
            if boxes is not None:
                for box, landmark in zip(boxes, picked_landmarks[j]):
                    cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]),
                                  (0, 0, 255),
                                  thickness=2)
                    cv2.circle(img, (landmark[0], landmark[1]),
                               radius=1,
                               color=(0, 0, 255),
                               thickness=2)
                    cv2.circle(img, (landmark[2], landmark[3]),
                               radius=1,
                               color=(0, 255, 0),
                               thickness=2)
                    cv2.circle(img, (landmark[4], landmark[5]),
                               radius=1,
                               color=(255, 0, 0),
                               thickness=2)
                    cv2.circle(img, (landmark[6], landmark[7]),
                               radius=1,
                               color=(0, 255, 255),
                               thickness=2)
                    cv2.circle(img, (landmark[8], landmark[9]),
                               radius=1,
                               color=(255, 255, 0),
                               thickness=2)

        out.write(img)
        cv2.imshow('RetinaFace-Pytorch', img)
        key = cv2.waitKey(1)
        if key == ord('q'):
            print('Now quit.')
            break

    cap.release()
    out.release()
    cv2.destroyAllWindows()