Exemplo n.º 1
0
def main():
    args = arg_parse()

    return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3}
    RetinaFace = torchvision_model.create_retinaface(return_layers)

    # Load 训练好的权重文件
    retina_dict = RetinaFace.state_dict()
    pre_state_dict = torch.load(args.model_path)
    pretrained_dict = {
        k[7:]: v
        for k, v in pre_state_dict.items() if k[7:] in retina_dict
    }
    RetinaFace.load_state_dict(pretrained_dict)

    RetinaFace = RetinaFace.cuda()
    RetinaFace.eval()

    # 读取文件
    img = cv2.imread(args.input)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = torch.from_numpy(img)
    img = img.permute(2, 0, 1)  # 通道转换  h, w, c -> c, h, w

    # 进入网络
    input_img = img.unsqueeze(0).float().cuda()  #扩展维度
    picked_boxes, picked_scores = get_detections(input_img,
                                                 RetinaFace,
                                                 score_threshold=0.5,
                                                 iou_threshold=0.3)

    # np_img = resized_img.cpu().permute(1,2,0).numpy()
    np_img = img.cpu().permute(1, 2, 0).numpy()
    np_img.astype(int)
    img = cv2.cvtColor(np_img.astype(np.uint8), cv2.COLOR_BGR2RGB)

    font = cv2.FONT_HERSHEY_SIMPLEX  # 设置字体

    for j, boxes in enumerate(picked_boxes):
        if boxes is not None:
            for box, score in zip(boxes, picked_scores[j]):
                cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]),
                              (0, 0, 255),
                              thickness=2)
                cv2.putText(img,
                            text=str(score.item())[:5],
                            org=(box[0], box[1]),
                            fontFace=font,
                            fontScale=0.5,
                            thickness=1,
                            lineType=cv2.LINE_AA,
                            color=(255, 255, 255))

    # 保存并展示处理后的图像
    cv2.imwrite(args.output, img)
    cv2.imshow('RetinaFace-Pytorch', img)
    cv2.waitKey()  #
def main():
    args = get_args()
    # Create torchvision model
    return_layers = {'layer2':1,'layer3':2,'layer4':3}
    RetinaFace = torchvision_model.create_retinaface(return_layers)

    # Load trained model
    retina_dict = RetinaFace.state_dict()
    pre_state_dict = torch.load(args.model_path)
    print(pre_state_dict['module.body.conv1.weight'].cpu().detach().numpy())
    pretrained_dict = {k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict}
    RetinaFace.load_state_dict(pretrained_dict)

    RetinaFace = RetinaFace.cuda()
    RetinaFace.eval()

    # Read image
    img = skimage.io.imread(args.image_path)
    img = torch.from_numpy(img)
    img = img.permute(2,0,1)

    if not args.scale == 1.0:
        size1 = int(img.shape[1]/args.scale)
        size2 = int(img.shape[2]/args.scale)
        img = resize(img.float(),(size1,size2))

    input_img = img.unsqueeze(0).float().cuda()
    picked_boxes, picked_landmarks, picked_scores = eval_widerface.get_detections(input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3)

    # np_img = resized_img.cpu().permute(1,2,0).numpy()
    np_img = img.cpu().permute(1,2,0).numpy()
    np_img.astype(int)
    img = cv2.cvtColor(np_img.astype(np.uint8),cv2.COLOR_BGR2RGB)

    font = cv2.FONT_HERSHEY_SIMPLEX

    for j, boxes in enumerate(picked_boxes):
        if boxes is not None:
            for box, landmark, score in zip(boxes,picked_landmarks[j],picked_scores[j]):
                cv2.rectangle(img,(box[0],box[1]),(box[2],box[3]),(0,0,255),thickness=2)
                cv2.circle(img,(landmark[0],landmark[1]),radius=1,color=(0,0,255),thickness=2)
                cv2.circle(img,(landmark[2],landmark[3]),radius=1,color=(0,255,0),thickness=2)
                cv2.circle(img,(landmark[4],landmark[5]),radius=1,color=(255,0,0),thickness=2)
                cv2.circle(img,(landmark[6],landmark[7]),radius=1,color=(0,255,255),thickness=2)
                cv2.circle(img,(landmark[8],landmark[9]),radius=1,color=(255,255,0),thickness=2)
                cv2.putText(img, text=str(score.item())[:5], org=(box[0],box[1]), fontFace=font, fontScale=0.5,
                            thickness=1, lineType=cv2.LINE_AA, color=(255, 255, 255))

    image_name = args.image_path.split('/')[-1]
    save_path = os.path.join(args.save_path,image_name)
    #cv2.imwrite(save_path, img)
    cv2.imshow('RetinaFace-Pytorch',img)
    cv2.waitKey()
Exemplo n.º 3
0
def main():
    args = get_args()
    if not os.path.exists(args.save_path):
        os.mkdir(args.save_path)
    log_path = os.path.join(args.save_path, 'log')
    if not os.path.exists(log_path):
        os.mkdir(log_path)

    writer = SummaryWriter(log_dir=log_path)

    data_path = args.data_path
    train_path = os.path.join(data_path, 'train/label.txt')
    val_path = os.path.join(data_path, 'val/label.txt')
    # dataset_train = TrainDataset(train_path,transform=transforms.Compose([RandomCroper(),RandomFlip()]))
    dataset_train = TrainDataset(train_path,
                                 transform=transforms.Compose(
                                     [Resizer(), PadToSquare()]))
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=8,
                                  batch_size=args.batch,
                                  collate_fn=collater,
                                  shuffle=True)
    # dataset_val = ValDataset(val_path,transform=transforms.Compose([RandomCroper()]))
    dataset_val = ValDataset(val_path,
                             transform=transforms.Compose(
                                 [Resizer(), PadToSquare()]))
    dataloader_val = DataLoader(dataset_val,
                                num_workers=8,
                                batch_size=args.batch,
                                collate_fn=collater)

    total_batch = len(dataloader_train)

    # Create the model
    # if args.depth == 18:
    #     retinaface = model.resnet18(num_classes=2, pretrained=True)
    # elif args.depth == 34:
    #     retinaface = model.resnet34(num_classes=2, pretrained=True)
    # elif args.depth == 50:
    #     retinaface = model.resnet50(num_classes=2, pretrained=True)
    # elif args.depth == 101:
    #     retinaface = model.resnet101(num_classes=2, pretrained=True)
    # elif args.depth == 152:
    #     retinaface = model.resnet152(num_classes=2, pretrained=True)
    # else:
    #     raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    # Create torchvision model
    return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3}
    retinaface = torchvision_model.create_retinaface(return_layers)

    retinaface = retinaface.cuda()
    retinaface = torch.nn.DataParallel(retinaface).cuda()
    retinaface.training = True

    optimizer = optim.Adam(retinaface.parameters(), lr=1e-3)
    # optimizer = optim.SGD(retinaface.parameters(), lr=1e-2, momentum=0.9, weight_decay=0.0005)
    # scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)
    # scheduler  = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)
    #scheduler  = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10,30,60], gamma=0.1)

    print('Start to train.')

    epoch_loss = []
    iteration = 0

    for epoch in range(args.epochs):
        retinaface.train()
        #print('Current learning rate:',scheduler.get_lr()[0])
        # retinaface.module.freeze_bn()
        # retinaface.module.freeze_first_layer()

        # Training
        for iter_num, data in enumerate(dataloader_train):
            optimizer.zero_grad()
            classification_loss, bbox_regression_loss, ldm_regression_loss = retinaface(
                [data['img'].cuda().float(), data['annot']])
            classification_loss = classification_loss.mean()
            bbox_regression_loss = bbox_regression_loss.mean()
            ldm_regression_loss = ldm_regression_loss.mean()

            # loss = classification_loss + 1.0 * bbox_regression_loss + 0.5 * ldm_regression_loss
            loss = classification_loss + bbox_regression_loss + ldm_regression_loss

            loss.backward()
            optimizer.step()
            #epoch_loss.append(loss.item())

            if iter_num % args.verbose == 0:
                log_str = "\n---- [Epoch %d/%d, Batch %d/%d] ----\n" % (
                    epoch, args.epochs, iter_num, total_batch)
                table_data = [['loss name', 'value'],
                              ['total_loss', str(loss.item())],
                              [
                                  'classification',
                                  str(classification_loss.item())
                              ], ['bbox',
                                  str(bbox_regression_loss.item())],
                              ['landmarks',
                               str(ldm_regression_loss.item())]]
                table = AsciiTable(table_data)
                #table = SingleTable(table_data)
                #table = DoubleTable(table_data)
                log_str += table.table
                print(log_str)
                # write the log to tensorboard
                writer.add_scalars(
                    'losses:', {
                        'total_loss': loss.item(),
                        'cls_loss': classification_loss.item(),
                        'bbox_loss': bbox_regression_loss.item(),
                        'ldm_loss': ldm_regression_loss.item()
                    }, iteration * args.verbose)
                iteration += 1

        #scheduler.step()
        #scheduler.step(np.mean(epoch_loss))

        # Eval
        if epoch % args.eval_step == 0:
            print('-------- RetinaFace Pytorch --------')
            print('Evaluating epoch {}'.format(epoch))
            recall, precision = eval_widerface.evaluate(
                dataloader_val, retinaface)
            print('Recall:', recall)
            print('Precision:', precision)

        # Save model
        if (epoch + 1) % args.save_step == 0:
            torch.save(retinaface.state_dict(),
                       args.save_path + '/model_epoch_{}.pt'.format(epoch + 1))
Exemplo n.º 4
0
def main(nummmmmm):
    args = get_args()

    # Create the model
    # if args.depth == 18:
    #     RetinaFace = model.resnet18(num_classes=2, pretrained=True)
    # elif args.depth == 34:
    #     RetinaFace = model.resnet34(num_classes=2, pretrained=True)
    # elif args.depth == 50:
    #     RetinaFace = model.resnet50(num_classes=2, pretrained=True)
    # elif args.depth == 101:
    #     RetinaFace = model.resnet101(num_classes=2, pretrained=True)
    # elif args.depth == 152:
    #     RetinaFace = model.resnet152(num_classes=2, pretrained=True)
    # else:
    #     raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    # Create torchvision model

    return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3}
    RetinaFace = torchvision_model.create_retinaface(return_layers)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # Load trained model
    retina_dict = RetinaFace.state_dict()
    pre_state_dict = torch.load('stage_5_68_full_model_epoch_121.pt',
                                map_location='cpu')
    pretrained_dict = {
        k[7:]: v
        for k, v in pre_state_dict.items() if k[7:] in retina_dict
    }
    RetinaFace.load_state_dict(pretrained_dict)
    RetinaFace.to(device)

    import time

    video = cv2.VideoCapture(0)
    # Read image
    while True:
        start = time.time()
        ret, img = video.read()
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = torch.from_numpy(img)
        img = img.permute(2, 0, 1)
        resized_img = img.float()
        # resized_img = resize(img.float(),(360,640))
        # print(resized_img.shape)
        input_img = resized_img.float().unsqueeze(0)

        picked_boxes, picked_landmarks = eval_widerface.get_detections(
            input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3)
        # print(picked_boxes)
        np_img = resized_img.cpu().permute(1, 2, 0).numpy()
        np_img.astype(int)
        img = cv2.cvtColor(np_img.astype(np.uint8), cv2.COLOR_BGR2RGB)

        for j, boxes in enumerate(picked_boxes):
            if boxes is not None:
                for box, landmark in zip(boxes, picked_landmarks[j]):
                    cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]),
                                  (0, 0, 255),
                                  thickness=2)
                    for i in range(0, 136, 2):
                        cv2.circle(img, (landmark[i], landmark[i + 1]),
                                   radius=1,
                                   color=(0, 0, 255),
                                   thickness=2)
        cv2.imshow('RetinaFace-Pytorch', img)
        print(time.time() - start)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
Exemplo n.º 5
0
def main():
    precision_global = 0
    args = get_args()
    if not os.path.exists(args.save_path):
        os.mkdir(args.save_path)
    log_path = os.path.join(args.save_path, 'log')
    if not os.path.exists(log_path):
        os.mkdir(log_path)

    writer = SummaryWriter(log_dir=log_path)

    data_path = args.data_path
    train_path = os.path.join(
        data_path,
        'retina-train-splitTrain.txt')  #"train\\label.txt")#'train.txt')
    val_path = os.path.join(
        data_path, "retina-train-splitTest.txt"
    )  #"retina-train-splitTest.txt") #'retina-val.txt')##'val.txt')
    # train_path = os.path.join(data_path,'train\\label.txt')#"train\\label.txt")#'train.txt')
    # val_path = os.path.join(data_path,'val\\label.txt')#"val\\label.txt")#'val.txt')
    # dataset_train = TrainDataset(train_path,transform=transforms.Compose([RandomCroper(),RandomFlip()]))
    dataset_train = TrainDataset(train_path,
                                 transform=transforms.Compose(
                                     [Resizer(), PadToSquare()]))
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=6,
                                  batch_size=args.batch,
                                  collate_fn=collater,
                                  shuffle=True)
    # dataset_val = ValDataset(val_path,transform=transforms.Compose([RandomCroper()]))
    dataset_val = ValDataset(val_path,
                             transform=transforms.Compose(
                                 [Resizer(), PadToSquare()]))
    dataloader_val = DataLoader(dataset_val,
                                num_workers=8,
                                batch_size=args.batch,
                                collate_fn=collater)

    total_batch = len(dataloader_train)

    # Create the model
    # if args.depth == 18:
    #     retinaface = model.resnet18(num_classes=2, pretrained=True)
    # elif args.depth == 34:
    #     retinaface = model.resnet34(num_classes=2, pretrained=True)
    # elif args.depth == 50:
    #     retinaface = model.resnet50(num_classes=2, pretrained=True)
    # elif args.depth == 101:
    #     retinaface = model.resnet101(num_classes=2, pretrained=True)
    # elif args.depth == 152:
    #     retinaface = model.resnet152(num_classes=2, pretrained=True)
    # else:
    #     raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    # Create torchvision model
    return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3}
    retinaface = torchvision_model.create_retinaface(return_layers)

    # Load trained model
    if (args.model_path is not None):
        retina_dict = retinaface.state_dict()
        pre_state_dict = torch.load(args.model_path)
        pretrained_dict = {
            k[7:]: v
            for k, v in pre_state_dict.items() if k[7:] in retina_dict
        }
        retinaface.load_state_dict(pretrained_dict)

    retinaface = retinaface.cuda()
    retinaface = torch.nn.DataParallel(retinaface).cuda()
    retinaface.training = True

    optimizer = optim.Adam(retinaface.parameters(), lr=1e-3)
    # optimizer = optim.SGD(retinaface.parameters(), lr=1e-2, momentum=0.9, weight_decay=0.0005)
    # scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)
    # scheduler  = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)
    #scheduler  = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10,30,60], gamma=0.1)

    #performance detect
    # print('-------- RetinaFace Pytorch --------')
    # recall, precision = eval_widerface.evaluate(dataloader_val, retinaface)
    # print('Recall:', recall)
    # print('Precision:', precision, "best Precision:", precision_global)

    print('Start to train.')

    epoch_loss = []
    iteration = 0

    for epoch in range(args.epochs):
        retinaface.train()

        # Training
        for iter_num, data in enumerate(dataloader_train):
            #ff = data["img"].numpy()
            #print(ff[0][1][320][320])
            optimizer.zero_grad()
            classification_loss, bbox_regression_loss, ldm_regression_loss = retinaface(
                [data['img'].cuda().float(), data['annot']])
            classification_loss = classification_loss.mean()
            bbox_regression_loss = bbox_regression_loss.mean()
            ldm_regression_loss = ldm_regression_loss.mean()

            # loss = classification_loss + 1.0 * bbox_regression_loss + 0.5 * ldm_regression_loss
            loss = classification_loss + bbox_regression_loss + ldm_regression_loss

            loss.backward()
            optimizer.step()

            if iter_num % args.verbose == 0:
                log_str = "\n---- [Epoch %d/%d, Batch %d/%d] ----\n" % (
                    epoch, args.epochs, iter_num, total_batch)
                table_data = [['loss name', 'value'],
                              ['total_loss', str(loss.item())],
                              [
                                  'classification',
                                  str(classification_loss.item())
                              ], ['bbox',
                                  str(bbox_regression_loss.item())],
                              ['landmarks',
                               str(ldm_regression_loss.item())]]
                table = AsciiTable(table_data)
                log_str += table.table
                print(log_str)
                # write the log to tensorboard
                writer.add_scalar('losses:', loss.item(),
                                  iteration * args.verbose)
                writer.add_scalar('class losses:', classification_loss.item(),
                                  iteration * args.verbose)
                writer.add_scalar('box losses:', bbox_regression_loss.item(),
                                  iteration * args.verbose)
                writer.add_scalar('landmark losses:',
                                  ldm_regression_loss.item(),
                                  iteration * args.verbose)
                iteration += 1

        # Eval
        if epoch % args.eval_step == 0:
            print('-------- RetinaFace Pytorch --------')
            print('Evaluating epoch {}'.format(epoch))
            recall, precision = eval_widerface.evaluate(
                dataloader_val, retinaface)
            if (precision_global < precision):
                precision_global = precision
                torch.save(
                    retinaface.state_dict(), args.save_path +
                    '/model_Best_epoch_{}.pt'.format(epoch + 1))
            print('Recall:', recall)
            print('Precision:', precision, "best Precision:", precision_global)

            writer.add_scalar('Recall:', recall, epoch * args.eval_step)
            writer.add_scalar('Precision:', precision, epoch * args.eval_step)

        # Save model
        if (epoch + 1) % args.save_step == 0:
            torch.save(retinaface.state_dict(),
                       args.save_path + '/model_epoch_{}.pt'.format(epoch + 1))

    writer.close()
Exemplo n.º 6
0
def main():
    args = get_args()
    # Create torchvision model
    return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3}
    RetinaFace = torchvision_model.create_retinaface(return_layers)

    # Load trained model
    retina_dict = RetinaFace.state_dict()
    pre_state_dict = torch.load(args.model_path)
    pretrained_dict = {
        k[7:]: v
        for k, v in pre_state_dict.items() if k[7:] in retina_dict
    }
    RetinaFace.load_state_dict(pretrained_dict)

    RetinaFace = RetinaFace.cuda()
    RetinaFace.eval()

    # Read video
    cap = cv2.VideoCapture(args.video_path)

    codec = cv2.VideoWriter_fourcc(*'MJPG')

    width = int(cap.get(3))
    height = int(cap.get(4))

    cap.set(cv2.CAP_PROP_FRAME_WIDTH, width)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
    fps = 25.0

    out = cv2.VideoWriter('args.save_path', codec, fps, (width, height))

    font = cv2.FONT_HERSHEY_SIMPLEX

    while (True):
        ret, img = cap.read()

        if not ret:
            print('Video open error.')
            break

        img = torch.from_numpy(img)
        img = img.permute(2, 0, 1)

        if not args.scale == 1.0:
            size1 = int(img.shape[1] / args.scale)
            size2 = int(img.shape[2] / args.scale)
            img = resize(img.float(), (size1, size2))

        input_img = img.unsqueeze(0).float().cuda()
        picked_boxes, picked_landmarks, picked_scores = eval_widerface.get_detections(
            input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3)

        # np_img = resized_img.cpu().permute(1,2,0).numpy()
        np_img = img.cpu().permute(1, 2, 0).numpy()
        np_img.astype(int)
        img = np_img.astype(np.uint8)

        for j, boxes in enumerate(picked_boxes):
            if boxes is not None:
                for box, landmark, score in zip(boxes, picked_landmarks[j],
                                                picked_scores[j]):
                    cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]),
                                  (0, 0, 255),
                                  thickness=2)
                    cv2.circle(img, (landmark[0], landmark[1]),
                               radius=1,
                               color=(0, 0, 255),
                               thickness=2)
                    cv2.circle(img, (landmark[2], landmark[3]),
                               radius=1,
                               color=(0, 255, 0),
                               thickness=2)
                    cv2.circle(img, (landmark[4], landmark[5]),
                               radius=1,
                               color=(255, 0, 0),
                               thickness=2)
                    cv2.circle(img, (landmark[6], landmark[7]),
                               radius=1,
                               color=(0, 255, 255),
                               thickness=2)
                    cv2.circle(img, (landmark[8], landmark[9]),
                               radius=1,
                               color=(255, 255, 0),
                               thickness=2)
                    cv2.putText(img,
                                text=str(score.item())[:5],
                                org=(box[0], box[1]),
                                fontFace=font,
                                fontScale=0.5,
                                thickness=1,
                                lineType=cv2.LINE_AA,
                                color=(255, 255, 255))

        out.write(img)
        cv2.imshow('RetinaFace-Pytorch', img)
        key = cv2.waitKey(1)
        if key == ord('q'):
            print('Now quit.')
            break

    cap.release()
    out.release()
    cv2.destroyAllWindows()
Exemplo n.º 7
0
def main():
    args = get_args()
    # Create torchvision model
    return_layers = {'layer2':1,'layer3':2,'layer4':3}
    RetinaFace = torchvision_model.create_retinaface(return_layers)

    # Load trained model
    retina_dict = RetinaFace.state_dict()
    pre_state_dict = torch.load(args.model_path)
    pretrained_dict = {k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict}
    RetinaFace.load_state_dict(pretrained_dict)

    RetinaFace = RetinaFace.cuda()
    RetinaFace.eval()

    o = open("out.txt",'w')
    f = open(args.text_path, 'r')
    lines = f.readlines()
    for line in lines:
        o.write(line)
        print(line)
        line = line.rstrip()
        if line.startswith('#'):
            path = args.image_path_prefix+ line[2:].replace('/','\\')
        else:
            path = args.image_path_prefix+ line.replace('/','\\')+".jpg"

        if not os.path.exists(path):
            continue

        # Read image
        # change by yzk
        img1 = skimage.io.imread(path, as_gray=True).astype(np.float32)
        img = []
        img2 = cv2.cvtColor(img1, cv2.COLOR_GRAY2BGR)
        img = (img2 * 255.0).astype(np.uint8)
        #ori
        # img = skimage.io.imread(path)


        img = torch.from_numpy(img)
        img = img.permute(2,0,1)

        if not args.scale == 1.0:
            size1 = int(img.shape[1]/args.scale)
            size2 = int(img.shape[2]/args.scale)
            img = resize(img.float(),(size1,size2))

        input_img = img.unsqueeze(0).float().cuda()
        picked_boxes, picked_landmarks, picked_scores = eval_widerface.get_detections(input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3)
        #print(str(picked_boxes[0].shape[0]))
        if picked_boxes is None or picked_boxes[0] is None:
            o.write("0" + '\n')
        else:
            o.write(str(picked_boxes[0].shape[0]) + '\n')
        # np_img = resized_img.cpu().permute(1,2,0).numpy()
        np_img = img.cpu().permute(1,2,0).numpy()
        np_img.astype(int)
        img = cv2.cvtColor(np_img.astype(np.uint8),cv2.COLOR_BGR2RGB)

        font = cv2.FONT_HERSHEY_SIMPLEX

        for j, boxes in enumerate(picked_boxes):
            if boxes is not None:
                for box, landmark, score in zip(boxes,picked_landmarks[j],picked_scores[j]):
                    ss = boxes.cpu().detach().numpy()
                    print(str(box[0].cpu().detach().numpy())+" " + str(box[1].cpu().detach().numpy())+" " + str(box[2].cpu().detach().numpy()- box[0].cpu().detach().numpy())+" " + str(box[3].cpu().detach().numpy() -box[1].cpu().detach().numpy()) + " " + str(score.item()) + '\n')
                    o.write(str(box[0].cpu().detach().numpy())+" " + str(box[1].cpu().detach().numpy())+" " + str(box[2].cpu().detach().numpy() - box[0].cpu().detach().numpy())+" " + str(box[3].cpu().detach().numpy() - box[1].cpu().detach().numpy()) + " " + str(score.item()) + '\n')
                    cv2.rectangle(img,(box[0],box[1]),(box[2],box[3]),(0,0,255),thickness=2)
                    cv2.circle(img,(landmark[0],landmark[1]),radius=1,color=(0,0,255),thickness=2)
                    cv2.circle(img,(landmark[2],landmark[3]),radius=1,color=(0,255,0),thickness=2)
                    cv2.circle(img,(landmark[4],landmark[5]),radius=1,color=(255,0,0),thickness=2)
                    cv2.circle(img,(landmark[6],landmark[7]),radius=1,color=(0,255,255),thickness=2)
                    cv2.circle(img,(landmark[8],landmark[9]),radius=1,color=(255,255,0),thickness=2)
                    cv2.putText(img, text=str(score.item())[:5], org=(box[0],box[1]), fontFace=font, fontScale=0.5,
                                thickness=1, lineType=cv2.LINE_AA, color=(255, 255, 255))


        image_name = path.split('\\')[-1]
        save_path = os.path.join(args.save_path,image_name)
        # cv2.imwrite(save_path, img)
        # cv2.imshow('RetinaFace-Pytorch',img)
        # cv2.waitKey()
    o.close()
def main(nummmmmm):
    args = get_args()

    # Create the model
    # if args.depth == 18:
    #     RetinaFace = model.resnet18(num_classes=2, pretrained=True)
    # elif args.depth == 34:
    #     RetinaFace = model.resnet34(num_classes=2, pretrained=True)
    # elif args.depth == 50:
    #     RetinaFace = model.resnet50(num_classes=2, pretrained=True)
    # elif args.depth == 101:
    #     RetinaFace = model.resnet101(num_classes=2, pretrained=True)
    # elif args.depth == 152:
    #     RetinaFace = model.resnet152(num_classes=2, pretrained=True)
    # else:
    #     raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    # Create torchvision model

    return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3}
    RetinaFace = torchvision_model.create_retinaface(return_layers)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # Load trained model
    retina_dict = RetinaFace.state_dict()
    pre_state_dict = torch.load(
        '/versa/elvishelvis/RetinaYang/out/stage_5_68_full_model_epoch_51.pt')
    pretrained_dict = {
        k[7:]: v
        for k, v in pre_state_dict.items() if k[7:] in retina_dict
    }
    RetinaFace.load_state_dict(pretrained_dict)
    RetinaFace.to(device)

    import time

    dataset_val = TrainDataset('./widerface/train/label.txt',
                               transform=transforms.Compose(
                                   [Resizer(640), PadToSquare()]))
    # dataset_val = ValDataset('./widerface/train/label.txt')
    for qq in range(100, 150):
        img = dataset_val[qq]['img']
        # img=cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        # img=skimage.io.imread("/versa/elvishelvis/RetinaFace_Pytorch/CelebA/Img/img_celeba.7z/img_celeba/118{}.jpg".format(str(qq)))
        img = img.permute(2, 0, 1)
        resized_img = img.float()
        input_img = resized_img.unsqueeze(0).to(device)
        start = time.time()
        picked_boxes, picked_landmarks = eval_widerface.get_detections(
            input_img, RetinaFace, score_threshold=0.9, iou_threshold=0.2)
        print(time.time() - start)
        # print(picked_boxes)
        np_img = resized_img.cpu().permute(1, 2, 0).numpy()
        np_img.astype(int)
        img = cv2.cvtColor(np_img.astype(np.uint8), cv2.COLOR_BGR2RGB)

        for j, boxes in enumerate(picked_boxes):
            if boxes is not None:
                for box, landmark in zip(boxes, picked_landmarks[j]):
                    cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]),
                                  (0, 0, 255),
                                  thickness=2)
                    for i in range(0, 136, 2):
                        cv2.circle(img, (landmark[i], landmark[i + 1]),
                                   radius=1,
                                   color=(0, 0, 255),
                                   thickness=2)

        image_name = args.image_path.split('/')[-1]
        save_path = os.path.join(args.save_path, image_name)
        cv2.imwrite('./RetinaFace-Pytorch{}.jpg'.format(qq),
                    cv2.resize(img, (640, 640)))
Exemplo n.º 9
0
def main():
    args = get_args()

    # Create retinaface
    return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3}
    RetinaFace = torchvision_model.create_retinaface(return_layers)

    retina_dict = RetinaFace.state_dict()
    pre_state_dict = torch.load(args.f_model)
    pretrained_dict = {
        k[7:]: v
        for k, v in pre_state_dict.items() if k[7:] in retina_dict
    }
    RetinaFace.load_state_dict(pretrained_dict)

    RetinaFace = RetinaFace.cuda()
    RetinaFace.eval()

    print('Retinaface create success.')

    # Create hopenet
    Hopenet = hopenet.Hopenet(torchvision.models.resnet.Bottleneck,
                              [3, 4, 6, 3], 66)

    saved_state_dict = torch.load(args.p_model)
    Hopenet.load_state_dict(saved_state_dict)
    Hopenet = Hopenet.cuda()
    Hopenet.eval()

    print('Hopenet create success.')

    idx_tensor = [idx for idx in range(66)]
    idx_tensor = torch.FloatTensor(idx_tensor).cuda()

    transformations = transforms.Compose([
        transforms.Scale(224),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    if args.type == 'image':
        img = cv2.imread(args.image_path)

        img = torch.from_numpy(img)
        img = img.permute(2, 0, 1)

        if not args.scale == 1.0:
            size1 = int(img.shape[1] / args.scale)
            size2 = int(img.shape[2] / args.scale)
            img = resize(img.float(), (size1, size2))

        input_img = img.unsqueeze(0).float().cuda()
        picked_boxes, picked_landmarks = eval_widerface.get_detections(
            input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3)

        np_img = img.cpu().permute(1, 2, 0).numpy()
        np_img.astype(int)
        img = np_img.astype(np.uint8)

        for j, boxes in enumerate(picked_boxes):
            if boxes is not None:
                for box, landmark in zip(boxes, picked_landmarks[j]):
                    # Crop face
                    x_min = int(box[0])
                    x_max = int(box[2])
                    y_min = int(box[1])
                    y_max = int(box[3])
                    # Clip
                    x_min = x_min if x_min > 0 else 0
                    x_max = x_max if x_max < img.shape[1] else img.shape[1]
                    y_min = y_min if y_min > 0 else 0
                    y_max = y_max if y_max < img.shape[0] else img.shape[0]

                    if not x_min < x_max or not y_min < y_max:
                        continue

                    bbox_height = abs(y_max - y_min)
                    face_img = img[y_min:y_max, x_min:x_max]
                    face_img = Image.fromarray(face_img)

                    # Transform
                    face_img = transformations(face_img)
                    img_shape = face_img.size()
                    face_img = face_img.view(1, img_shape[0], img_shape[1],
                                             img_shape[2])
                    face_img = face_img.cuda()

                    yaw, pitch, roll = Hopenet(face_img)

                    yaw_predicted = F.softmax(yaw)
                    pitch_predicted = F.softmax(pitch)
                    roll_predicted = F.softmax(roll)
                    # Get continuous predictions in degrees.
                    yaw_predicted = torch.sum(
                        yaw_predicted.data[0] * idx_tensor) * 3 - 99
                    pitch_predicted = torch.sum(
                        pitch_predicted.data[0] * idx_tensor) * 3 - 99
                    roll_predicted = torch.sum(
                        roll_predicted.data[0] * idx_tensor) * 3 - 99

                    utils.draw_axis(img,
                                    yaw_predicted,
                                    pitch_predicted,
                                    roll_predicted,
                                    tdx=(x_min + x_max) / 2,
                                    tdy=(y_min + y_max) / 2,
                                    size=bbox_height / 2)
                    cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]),
                                  (255, 0, 255),
                                  thickness=2)
                    # cv2.circle(img,(landmark[0],landmark[1]),radius=1,color=(0,0,255),thickness=2)
                    # cv2.circle(img,(landmark[2],landmark[3]),radius=1,color=(0,255,0),thickness=2)
                    # cv2.circle(img,(landmark[4],landmark[5]),radius=1,color=(255,0,0),thickness=2)
                    # cv2.circle(img,(landmark[6],landmark[7]),radius=1,color=(0,255,255),thickness=2)
                    # cv2.circle(img,(landmark[8],landmark[9]),radius=1,color=(255,255,0),thickness=2)

            cv2.imshow('RetinaFace-Hopenet', img)
            key = cv2.waitKey()

    else:
        # Read video
        cap = cv2.VideoCapture(args.video_path)

        codec = cv2.VideoWriter_fourcc(*'MJPG')

        width = int(cap.get(3))
        height = int(cap.get(4))

        cap.set(cv2.CAP_PROP_FRAME_WIDTH, width)
        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
        fps = 25.0

        out = cv2.VideoWriter(args.out, codec, fps, (width, height))

        while (True):
            ret, img = cap.read()

            if not ret:
                print('Video open error.')
                break

            img = torch.from_numpy(img)
            img = img.permute(2, 0, 1)

            if not args.scale == 1.0:
                size1 = int(img.shape[1] / args.scale)
                size2 = int(img.shape[2] / args.scale)
                img = resize(img.float(), (size1, size2))

            input_img = img.unsqueeze(0).float().cuda()
            picked_boxes, picked_landmarks = eval_widerface.get_detections(
                input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3)

            # np_img = resized_img.cpu().permute(1,2,0).numpy()
            np_img = img.cpu().permute(1, 2, 0).numpy()
            np_img.astype(int)
            img = np_img.astype(np.uint8)

            for j, boxes in enumerate(picked_boxes):
                if boxes is not None:
                    for box, landmark in zip(boxes, picked_landmarks[j]):
                        # Crop face
                        x_min = int(box[0])
                        x_max = int(box[2])
                        y_min = int(box[1])
                        y_max = int(box[3])
                        # Clip
                        x_min = x_min if x_min > 0 else 0
                        x_max = x_max if x_max < img.shape[1] else img.shape[1]
                        y_min = y_min if y_min > 0 else 0
                        y_max = y_max if y_max < img.shape[0] else img.shape[0]

                        if not x_min < x_max or not y_min < y_max:
                            continue

                        bbox_height = abs(y_max - y_min)
                        face_img = img[y_min:y_max, x_min:x_max]
                        face_img = Image.fromarray(face_img)

                        # Transform
                        face_img = transformations(face_img)
                        img_shape = face_img.size()
                        face_img = face_img.view(1, img_shape[0], img_shape[1],
                                                 img_shape[2])
                        face_img = face_img.cuda()

                        yaw, pitch, roll = Hopenet(face_img)

                        yaw_predicted = F.softmax(yaw)
                        pitch_predicted = F.softmax(pitch)
                        roll_predicted = F.softmax(roll)
                        # Get continuous predictions in degrees.
                        yaw_predicted = torch.sum(
                            yaw_predicted.data[0] * idx_tensor) * 3 - 99
                        pitch_predicted = torch.sum(
                            pitch_predicted.data[0] * idx_tensor) * 3 - 99
                        roll_predicted = torch.sum(
                            roll_predicted.data[0] * idx_tensor) * 3 - 99

                        utils.draw_axis(img,
                                        yaw_predicted,
                                        pitch_predicted,
                                        roll_predicted,
                                        tdx=(x_min + x_max) / 2,
                                        tdy=(y_min + y_max) / 2,
                                        size=bbox_height / 2)
                        cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]),
                                      (255, 0, 255),
                                      thickness=2)
                        # cv2.rectangle(img,(x_min,y_min),(x_max,y_max),(255,0,255),thickness=2)
                        cv2.circle(img, (landmark[0], landmark[1]),
                                   radius=1,
                                   color=(0, 0, 255),
                                   thickness=2)
                        cv2.circle(img, (landmark[2], landmark[3]),
                                   radius=1,
                                   color=(0, 255, 0),
                                   thickness=2)
                        cv2.circle(img, (landmark[4], landmark[5]),
                                   radius=1,
                                   color=(255, 0, 0),
                                   thickness=2)
                        cv2.circle(img, (landmark[6], landmark[7]),
                                   radius=1,
                                   color=(0, 255, 255),
                                   thickness=2)
                        cv2.circle(img, (landmark[8], landmark[9]),
                                   radius=1,
                                   color=(255, 255, 0),
                                   thickness=2)

            out.write(img)
            cv2.imshow('RetinaFace-Pytorch', img)
            key = cv2.waitKey(1)
            if key == ord('q'):
                print('Now quit.')
                break

        cap.release()
        out.release()
    cv2.destroyAllWindows()
def main():
    args = get_args()
    if not os.path.exists(args.save_path):
        os.mkdir(args.save_path)
    log_path = os.path.join(args.save_path, 'log')
    if not os.path.exists(log_path):
        os.mkdir(log_path)

    data_path = args.data_path
    # dataset_train = TrainDataset(train_path,transform=transforms.Compose([RandomCroper(),()]))
    dataset_train = TrainDataset('./widerface/train/label.txt',
                                 transform=transforms.Compose([
                                     RandomErasing(),
                                     RandomFlip(),
                                     Rotate(),
                                     Color(),
                                     Resizer(),
                                     PadToSquare()
                                 ]))
    # dataset_train = TrainDataset('./widerface/train/label.txt',transform=transforms.Compose([Resizer(),PadToSquare()]))
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=8,
                                  batch_size=args.batch,
                                  collate_fn=collater,
                                  shuffle=True)
    # dataset_val = ValDataset(val_path,transform=transforms.Compose([RandomCroper()]))
    dataset_val = TrainDataset('./widerface/train/label.txt',
                               transform=transforms.Compose(
                                   [Resizer(640), PadToSquare()]))
    dataloader_val = DataLoader(dataset_val,
                                num_workers=8,
                                batch_size=args.batch,
                                collate_fn=collater)

    total_batch = len(dataloader_train)

    # Create torchvision model
    return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3}
    retinaface = torchvision_model.create_retinaface(return_layers)
    retinaface_ = retinaface.cuda()
    retinaface = torch.nn.DataParallel(retinaface_).cuda()
    retinaface.training = True
    base_lr = 1e-7

    # pre_train = torch.load('network.torch')
    # cur=retinaface.state_dict()
    # for k, v in cur.items():
    #     if k[12:] in pre_train:
    #         print(k[12:])
    #         cur[k]=pre_train[k[12:]]
    # retinaface.load_state_dict(cur)
    retinaface.load_state_dict(
        torch.load(
            "/versa/elvishelvis/RetinaYang/out/stage_5_68_full_model_epoch_121.pt"
        ))
    lr = base_lr
    # optimizer=torch.optim.Adam(retinaface.parameters(),lr=lr)
    # fix encoder
    for name, value in retinaface.named_parameters():
        if 'Landmark' in name:
            value.requires_grad = False
    lr_cos = lambda n: 0.5 * (1 + np.cos((n) /
                                         (args.epochs) * np.pi)) * base_lr
    params = filter(lambda p: p.requires_grad == True, retinaface.parameters())
    body = filter(lambda p: p.requires_grad == False, retinaface.parameters())
    optimizer = torch.optim.Adam([{
        'params': body,
        'lr': lr * 3
    }, {
        'params': params,
        'lr': lr
    }])
    #evaluation the current model
    if (args.training == False):
        print("not pretrain")
        recall, precision, landmakr, miss = eval_widerface.evaluate(
            dataloader_val, retinaface)
        print('Recall:', recall)
        print('Precision:', precision)
        print("landmark: ", str(landmakr))
        print("miss: " + str(miss))
        return
    ##
    print('Start to train.')

    epoch_loss = []
    iteration = 0
    retinaface = retinaface.cuda()
    for epoch in range(args.epochs):
        lr = lr_cos(epoch)

        retinaface.train()

        # Training
        for iter_num, data in enumerate(dataloader_train):
            optimizer.zero_grad()
            classification_loss, bbox_regression_loss, ldm_regression_loss = retinaface(
                [data['img'].cuda().float(), data['annot']])
            classification_loss = classification_loss.mean()
            bbox_regression_loss = bbox_regression_loss.mean()
            ldm_regression_loss = ldm_regression_loss.mean()

            # loss = classification_loss + 1.0 * bbox_regression_loss + 0.5 * ldm_regression_loss
            loss = classification_loss + 0.15 * bbox_regression_loss + 0.25 * ldm_regression_loss

            loss.backward()
            optimizer.step()

            if iter_num % args.verbose == 0:
                log_str = "\n---- [Epoch %d/%d, Batch %d/%d] ----\n" % (
                    epoch, args.epochs, iter_num, total_batch)
                table_data = [['loss name', 'value'],
                              ['total_loss', str(loss.item())],
                              [
                                  'classification',
                                  str(classification_loss.item())
                              ], ['bbox',
                                  str(bbox_regression_loss.item())],
                              ['landmarks',
                               str(ldm_regression_loss.item())]]
                table = AsciiTable(table_data)
                log_str += table.table
                print(log_str)
                iteration += 1

        # Eval
        if epoch % args.eval_step == 0:
            with open("aaa.txt", 'a') as f:
                f.write('-------- RetinaFace Pytorch --------' + '\n')
                f.write('Evaluating epoch {}'.format(epoch) + '\n')
                f.write('total_loss:' + str(loss.item()) + '\n')
                f.write('classification' + str(classification_loss.item()) +
                        '\n')
                f.write('bbox' + str(bbox_regression_loss.item()) + '\n')
                f.write('landmarks' + str(ldm_regression_loss.item()) + '\n')

                f.close()
            print('-------- RetinaFace Pytorch --------')
            print('Evaluating epoch {}'.format(epoch))
            recall, precision, landmakr, miss = eval_widerface.evaluate(
                dataloader_val, retinaface)
            print('Recall:', recall)
            print('Precision:', precision)
            print("landmark: ", str(landmakr))
            print("miss: " + str(miss))

            with open("aaa.txt", 'a') as f:
                f.write('-------- RetinaFace Pytorch --------(not pretrain)' +
                        '\n')
                f.write('Evaluating epoch {}'.format(epoch) + '\n')
                f.write('Recall:' + str(recall) + '\n')
                f.write('Precision:' + str(precision) + '\n')
                f.write("landmark: " + str(landmakr) + '\n')
                f.write("miss: " + str(miss) + '\n')
                f.close()
        # Save model
        if (epoch) % args.save_step == 0:
            torch.save(
                retinaface.state_dict(), args.save_path +
                '/stage_5_68_full_model_epoch_{}.pt'.format(epoch + 1))
Exemplo n.º 11
0
def main():
    args = get_args()

    # Create the model
    # if args.depth == 18:
    #     RetinaFace = model.resnet18(num_classes=2, pretrained=True)
    # elif args.depth == 34:
    #     RetinaFace = model.resnet34(num_classes=2, pretrained=True)
    # elif args.depth == 50:
    #     RetinaFace = model.resnet50(num_classes=2, pretrained=True)
    # elif args.depth == 101:
    #     RetinaFace = model.resnet101(num_classes=2, pretrained=True)
    # elif args.depth == 152:
    #     RetinaFace = model.resnet152(num_classes=2, pretrained=True)
    # else:
    #     raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    # Create torchvision model
    return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3}
    RetinaFace = torchvision_model.create_retinaface(return_layers)

    # Load trained model
    retina_dict = RetinaFace.state_dict()
    pre_state_dict = torch.load(args.model_path)
    pretrained_dict = {
        k[7:]: v
        for k, v in pre_state_dict.items() if k[7:] in retina_dict
    }
    RetinaFace.load_state_dict(pretrained_dict)

    RetinaFace = RetinaFace.cuda()
    RetinaFace.eval()

    # Read image
    img = skimage.io.imread(args.image_path)
    img = torch.from_numpy(img)
    img = img.permute(2, 0, 1)
    padded_img, _ = pad_to_square(img, 0)
    resized_img = resize(padded_img.float(), (640, 640))
    input_img = resized_img.unsqueeze(0).cuda()
    picked_boxes, picked_landmarks = eval_widerface.get_detections(
        input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3)

    np_img = resized_img.cpu().permute(1, 2, 0).numpy()
    np_img.astype(int)
    img = cv2.cvtColor(np_img.astype(np.uint8), cv2.COLOR_BGR2RGB)

    for j, boxes in enumerate(picked_boxes):
        if boxes is not None:
            for box, landmark in zip(boxes, picked_landmarks[j]):
                cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]),
                              (0, 0, 255),
                              thickness=2)
                cv2.circle(img, (landmark[0], landmark[1]),
                           radius=1,
                           color=(0, 0, 255),
                           thickness=2)
                cv2.circle(img, (landmark[2], landmark[3]),
                           radius=1,
                           color=(0, 255, 0),
                           thickness=2)
                cv2.circle(img, (landmark[4], landmark[5]),
                           radius=1,
                           color=(255, 0, 0),
                           thickness=2)
                cv2.circle(img, (landmark[6], landmark[7]),
                           radius=1,
                           color=(0, 255, 255),
                           thickness=2)
                cv2.circle(img, (landmark[8], landmark[9]),
                           radius=1,
                           color=(255, 255, 0),
                           thickness=2)

    image_name = args.image_path.split('/')[-1]
    save_path = os.path.join(args.save_path, image_name)
    cv2.imwrite(save_path, img)
    cv2.imshow('RetinaFace-Pytorch', img)
    cv2.waitKey()
Exemplo n.º 12
0
def main():
    args = get_args()
    if not os.path.exists(args.save_path):
        os.mkdir(args.save_path)
    log_path = os.path.join(args.save_path,'log')
    if not os.path.exists(log_path):
        os.mkdir(log_path)

    # # writer = SummaryWriter(log_dir=log_path)

    dataset_train = TrainDataset(transform=transforms.Compose([Rotate(),Resizer(),Color()]))
    len_train_set = int(len(dataset_train) * 0.7)
    len_val_set   = len(dataset_train) - len_train_set

    train_set, val_set = random_split(dataset_train, [len_train_set, len_val_set])
    dataloader_train = DataLoader(train_set, num_workers=8, batch_size=args.batch, collate_fn=collater,shuffle=True)
    dataloader_val = DataLoader(val_set, num_workers=8, batch_size=args.batch, collate_fn=collater)
    
    total_batch = len(dataloader_train)

    # Create torchvision model
    return_layers = {'layer2':1,'layer3':2,'layer4':3}
    retinaface = torchvision_model.create_retinaface(return_layers)
    retinaface = retinaface.cuda()

    base_lr=1e-4
    lr = base_lr
    optimizer = optim.Adam(retinaface.parameters(), lr=lr)

    retinaface = torch.nn.DataParallel(retinaface).cuda()
    retinaface.training = True
    # retinaface.load_state_dict(torch.load("./pretrained.torch"))
    retinaface.load_state_dict(torch.load("./out/mnas_epoch__ori111124.pt"))
    
    
    lr_cos = lambda n: 0.5 * (1 + np.cos((n) / (args.epochs) * np.pi)) * base_lr
    # optimizer = optim.SGD(retinaface.parameters(), lr=1e-2, momentum=0.9, weight_decay=0.0005)
    # scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)
    # scheduler  = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)
    #scheduler  = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10,30,60], gamma=0.1)
    print('Start to train.')

    # ####
    # print("pretrained")
    # recall, precision, landmakr,miss= eval_widerface.evaluate(dataloader_val,retinaface)
    # print('Recall:',recall)
    # print('Precision:',precision)
    # print("landmark: ",str(landmakr))
    # print("miss: "+ str(miss))
    # sdfsdfsdf

    # ###



    epoch_loss = []
    iteration = 0

    for epoch in range(args.epochs):
        lr=lr_cos(epoch)
        print("Current lr is {}".format(lr))
        retinaface.train()
        #print('Current learning rate:',scheduler.get_lr()[0])
        # retinaface.module.freeze_bn()
        # retinaface.module.freeze_first_layer()

        # Training
        for iter_num,data in enumerate(dataloader_train):
            optimizer.zero_grad()
            classification_loss, bbox_regression_loss,ldm_regression_loss = retinaface([data['img'].cuda().float(), data['annot']])
            classification_loss = classification_loss.mean()
            bbox_regression_loss = bbox_regression_loss.mean()
            ldm_regression_loss = ldm_regression_loss.mean()

            loss = classification_loss+0.1*ldm_regression_loss
            # loss = classification_loss + bbox_regression_loss + ldm_regression_loss

            loss.backward()
            optimizer.step()
            #epoch_loss.append(loss.item())
            
            if iter_num % args.verbose == 0:
                log_str = "\n---- [Epoch %d/%d, Batch %d/%d] ----\n" % (epoch, args.epochs, iter_num, total_batch)
                table_data = [
                    ['loss name','value'],
                    ['total_loss',str(loss.item())],
                    ['classification',str(classification_loss.item())],
                    ['bbox',str(bbox_regression_loss.item())],
                    ['landmarks',str(ldm_regression_loss.item())]
                    ]
                table = AsciiTable(table_data)
                #table = SingleTable(table_data)
                #table = DoubleTable(table_data)
                log_str +=table.table
                print(log_str)
                # write the log to tensorboard
                # writer.add_scalar('losses:',loss.item(),iteration*args.verbose)
                # writer.add_scalar('class losses:',classification_loss.item(),iteration*args.verbose)
                # writer.add_scalar('box losses:',bbox_regression_loss.item(),iteration*args.verbose)
                # writer.add_scalar('landmark losses:',ldm_regression_loss.item(),iteration*args.verbose)
                iteration +=1
        
        #scheduler.step()
        #scheduler.step(np.mean(epoch_loss))	

        # Eval
        if epoch % args.eval_step == 0:
            print('-------- RetinaFace Pytorch --------')
            print ('Evaluating epoch {}'.format(epoch))
            recall, precision, landmakr,miss= eval_widerface.evaluate(dataloader_val,retinaface)
            print('Recall:',recall)
            print('Precision:',precision)
            print("landmark: ",str(landmakr))
            print("miss: "+ str(miss))
            with open("bbb.txt", 'a') as f:
                f.write('-------- RetinaFace Pytorch --------(pretrain)'+'\n')
                f.write ('Evaluating epoch {}'.format(epoch)+'\n')
                f.write('Recall:'+str(recall)+'\n')
                f.write('Precision:'+str(precision)+'\n')
                f.write("landmark: "+str(landmakr)+'\n')
                f.write("miss: "+ str(miss)+'\n')
                f.close()
            # writer.add_scalar('Recall:', recall, epoch*args.eval_step)
            # writer.add_scalar('Precision:', precision, epoch*args.eval_step)

        # Save model
        if (epoch + 1) % args.save_step == 0:
            torch.save(retinaface.state_dict(), args.save_path + '/pretrain{}.pt'.format(epoch + 1+5+1112222211100))
Exemplo n.º 13
0
def main():
    args = get_args()
    if not os.path.exists(args.save_path):
        os.mkdir(args.save_path)
    log_path = os.path.join(args.save_path, 'log')
    if not os.path.exists(log_path):
        os.mkdir(log_path)

    writer = SummaryWriter(log_dir=log_path)

    data_path = args.data_path
    train_path = os.path.join(data_path, 'train/label.txt')
    val_path = os.path.join(data_path, 'val/label.txt')
    dataloader_train, dataloader_test = load_data(train_path,
                                                  args.batch_size,
                                                  split_train_test=True)
    dataloader_val = load_data(val_path, args.batch_size)

    total_batch = len(dataloader_train)

    # Create torchvision model
    retinaface = torchvision_model.create_retinaface().cuda()
    retinaface = torch.nn.DataParallel(retinaface).cuda()
    retinaface.training = True

    optimizer = optim.Adam(retinaface.parameters(), lr=1e-3)

    print('Start to train.')

    epoch_loss = []
    iteration = 0

    for epoch in range(args.epochs):
        retinaface.train()

        # Training
        for iter_num, data in enumerate(dataloader_train):
            optimizer.zero_grad()
            classification_loss, bbox_regression_loss, ldm_regression_loss = retinaface(
                [data['img'].cuda().float(), data['annot']])
            classification_loss = classification_loss.mean()
            bbox_regression_loss = bbox_regression_loss.mean()
            ldm_regression_loss = ldm_regression_loss.mean()

            # loss = classification_loss + 1.0 * bbox_regression_loss + 0.5 * ldm_regression_loss
            loss = classification_loss + bbox_regression_loss + 0.5 * ldm_regression_loss

            loss.backward()
            optimizer.step()

            if iter_num % args.verbose == 0:
                log_str = "\n---- [Epoch %d/%d, Batch %d/%d] ----\n" % (
                    epoch, args.epochs, iter_num, total_batch)
                table_data = [['loss name', 'value'],
                              ['total_loss', str(loss.item())],
                              [
                                  'classification',
                                  str(classification_loss.item())
                              ], ['bbox',
                                  str(bbox_regression_loss.item())],
                              ['landmarks',
                               str(ldm_regression_loss.item())]]
                table = AsciiTable(table_data)
                log_str += table.table
                print("train loses:")
                print(log_str)

                # write the log to tensorboard
                writer.add_scalar('losses:', loss.item(),
                                  iteration * args.verbose)
                writer.add_scalar('class losses:', classification_loss.item(),
                                  iteration * args.verbose)
                writer.add_scalar('box losses:', bbox_regression_loss.item(),
                                  iteration * args.verbose)
                writer.add_scalar('landmark losses:',
                                  ldm_regression_loss.item(),
                                  iteration * args.verbose)
                iteration += 1
                validate(dataloader_test, retinaface)

        # Eval
        if epoch % args.eval_step == 0:
            print('-------- RetinaFace --------')
            print('Evaluating epoch {}'.format(epoch))
            recall, precision = eval_widerface.evaluate(
                dataloader_val, retinaface)
            print('Recall:', recall)
            print('Precision:', precision)

            writer.add_scalar('Recall:', recall, epoch * args.eval_step)
            writer.add_scalar('Precision:', precision, epoch * args.eval_step)

        # Save model
        if (epoch + 1) % args.save_step == 0:
            torch.save(retinaface.state_dict(),
                       args.save_path + '/model_epoch_{}.pt'.format(epoch + 1))

    writer.close()
def main():
    args = get_args()
    # Create torchvision model
    return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3}
    RetinaFace = torchvision_model.create_retinaface(return_layers)

    # Load trained model
    retina_dict = RetinaFace.state_dict()
    pre_state_dict = torch.load(args.model_path)
    pretrained_dict = {
        k[7:]: v
        for k, v in pre_state_dict.items() if k[7:] in retina_dict
    }
    RetinaFace.load_state_dict(pretrained_dict)

    RetinaFace = RetinaFace.cuda()
    RetinaFace.eval()

    vc = cv2.VideoCapture(0)
    while True:  # 循环读取视频帧
        rval, img_raw = vc.read()
    # Read image
    img = skimage.io.imread(args.image_path)
    img = torch.from_numpy(img)
    img = img.permute(2, 0, 1)

    if not args.scale == 1.0:
        size1 = int(img.shape[1] / args.scale)
        size2 = int(img.shape[2] / args.scale)
        img = resize(img.float(), (size1, size2))

    input_img = img.unsqueeze(0).float().cuda()
    picked_boxes, picked_landmarks = eval_widerface.get_detections(
        input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3)

    # np_img = resized_img.cpu().permute(1,2,0).numpy()
    np_img = img.cpu().permute(1, 2, 0).numpy()
    np_img.astype(int)
    img = cv2.cvtColor(np_img.astype(np.uint8), cv2.COLOR_BGR2RGB)

    for j, boxes in enumerate(picked_boxes):
        if boxes is not None:
            for box, landmark in zip(boxes, picked_landmarks[j]):
                cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]),
                              (0, 0, 255),
                              thickness=2)
                cv2.circle(img, (landmark[0], landmark[1]),
                           radius=1,
                           color=(0, 0, 255),
                           thickness=2)
                cv2.circle(img, (landmark[2], landmark[3]),
                           radius=1,
                           color=(0, 255, 0),
                           thickness=2)
                cv2.circle(img, (landmark[4], landmark[5]),
                           radius=1,
                           color=(255, 0, 0),
                           thickness=2)
                cv2.circle(img, (landmark[6], landmark[7]),
                           radius=1,
                           color=(0, 255, 255),
                           thickness=2)
                cv2.circle(img, (landmark[8], landmark[9]),
                           radius=1,
                           color=(255, 255, 0),
                           thickness=2)

    image_name = args.image_path.split('/')[-1]
    # save_path = os.path.join(args.save_path,image_name)
    # cv2.imwrite(save_path, img)
    cv2.imshow('RetinaFace-Pytorch', img)
    cv2.waitKey()
Exemplo n.º 15
0
def main():
    args = get_args()

    # Create the model
    # if args.depth == 18:
    #     RetinaFace = model.resnet18(num_classes=2, pretrained=True)
    # elif args.depth == 34:
    #     RetinaFace = model.resnet34(num_classes=2, pretrained=True)
    # elif args.depth == 50:
    #     RetinaFace = model.resnet50(num_classes=2, pretrained=True)
    # elif args.depth == 101:
    #     RetinaFace = model.resnet101(num_classes=2, pretrained=True)
    # elif args.depth == 152:
    #     RetinaFace = model.resnet152(num_classes=2, pretrained=True)
    # else:
    #     raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    # Create torchvision model

    return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3}
    RetinaFace = torchvision_model.create_retinaface(return_layers)

    # Load trained model
    retina_dict = RetinaFace.state_dict()
    pre_state_dict = torch.load(args.model_path)
    pretrained_dict = {
        k[7:]: v
        for k, v in pre_state_dict.items() if k[7:] in retina_dict
    }
    RetinaFace.load_state_dict(pretrained_dict)
    RetinaFace.cuda()
    import time
    start = time.time()
    for qq in range(400, 500):
        img = skimage.io.imread(
            "/versa/elvishelvis/RetinaFace_Pytorch/CelebA/Img/img_celeba.7z/img_celeba/118{}.jpg"
            .format(str(qq)))
        print(img.shape)
        # img=cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = torch.from_numpy(img)
        img = img.permute(2, 0, 1)
        resized_img = resize(img.float(), (320, 320))
        input_img = resized_img.unsqueeze(0).cuda()
        picked_boxes, picked_landmarks = eval_widerface.get_detections(
            input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3)
        # print(picked_boxes)
        np_img = resized_img.cpu().permute(1, 2, 0).numpy()
        np_img.astype(int)
        img = cv2.cvtColor(np_img.astype(np.uint8), cv2.COLOR_BGR2RGB)

        for j, boxes in enumerate(picked_boxes):
            if boxes is not None:
                for box, landmark in zip(boxes, picked_landmarks[j]):
                    cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]),
                                  (0, 0, 255),
                                  thickness=2)
                    for i in range(0, 10, 2):
                        cv2.circle(img, (landmark[i], landmark[i + 1]),
                                   radius=1,
                                   color=(0, 0, 255),
                                   thickness=2)

        image_name = args.image_path.split('/')[-1]
        save_path = os.path.join(args.save_path, image_name)
        cv2.imwrite('RetinaFace-Pytorch{}.jpg'.format(qq),
                    cv2.resize(img, (640, 640)))
    print(time.time() - start)
Exemplo n.º 16
0
def detect_img(img):
    args = get_args()
    # Create torchvision model
    return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3}
    RetinaFace = torchvision_model.create_retinaface(return_layers)

    # Load trained model
    retina_dict = RetinaFace.state_dict()
    pre_state_dict = torch.load(args.model_path)
    pretrained_dict = {
        k[7:]: v
        for k, v in pre_state_dict.items() if k[7:] in retina_dict
    }
    RetinaFace.load_state_dict(pretrained_dict)

    RetinaFace = RetinaFace.cuda()
    RetinaFace.eval()

    # Read image
    img = torch.from_numpy(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    img = img.permute(2, 0, 1)

    if not args.scale == 1.0:
        size1 = int(img.shape[1] / args.scale)
        size2 = int(img.shape[2] / args.scale)
        img = resize(img.float(), (size1, size2))

    input_img = img.unsqueeze(0).float().cuda()
    picked_boxes, picked_landmarks, picked_scores = eval_widerface.get_detections(
        input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3)

    # np_img = resized_img.cpu().permute(1,2,0).numpy()
    np_img = img.cpu().permute(1, 2, 0).numpy()
    np_img.astype(int)
    img = cv2.cvtColor(np_img.astype(np.uint8), cv2.COLOR_BGR2RGB)

    font = cv2.FONT_HERSHEY_SIMPLEX

    for j, boxes in enumerate(picked_boxes):
        if boxes is not None:
            for box, landmark, score in zip(boxes, picked_landmarks[j],
                                            picked_scores[j]):
                cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]),
                              (255, 0, 255),
                              thickness=2)
                cv2.circle(img, (landmark[0], landmark[1]),
                           radius=1,
                           color=(0, 0, 255),
                           thickness=2)
                cv2.circle(img, (landmark[2], landmark[3]),
                           radius=1,
                           color=(0, 255, 0),
                           thickness=2)
                cv2.circle(img, (landmark[4], landmark[5]),
                           radius=1,
                           color=(255, 0, 0),
                           thickness=2)
                cv2.circle(img, (landmark[6], landmark[7]),
                           radius=1,
                           color=(0, 255, 255),
                           thickness=2)
                cv2.circle(img, (landmark[8], landmark[9]),
                           radius=1,
                           color=(255, 255, 0),
                           thickness=2)
                '''
                x = (landmark[0] + landmark[2]) / 2
                y = landmark[3] - (landmark[3] - box[1]) / 3
                cv2.circle(img,(x,y),radius=5,color=(0,0,255),thickness=1)
                '''
                '''
                start_point_x = (landmark[0] + landmark[2]) / 2
                start_point_y = (landmark[1] + landmark[3]) / 2
                end_point_x = (landmark[8] + landmark[6]) / 2
                end_point_y = (landmark[9] + landmark[7]) / 2
                cv2.line(img , (start_point_x,start_point_y),(landmark[4],landmark[5]),color=(255,100,0),thickness=2)
                cv2.line(img , (landmark[4],landmark[5]),(end_point_x,end_point_y),color=(255,255,100),thickness=2)
                '''
                '''
                cv2.rectangle(img,(landmark[0],landmark[1]),(landmark[8],landmark[9]),(0,0,100),thickness=2)
                '''
                cv2.putText(img,
                            text=str(score.item())[:5],
                            org=(box[0], box[1]),
                            fontFace=font,
                            fontScale=0.5,
                            thickness=1,
                            lineType=cv2.LINE_AA,
                            color=(255, 255, 255))

    return img