def img_crop_face(root_, pathinfo, mode="train", confidence_threshold=0.05, top_k=10):
    # move rate img from train set to val set
    # root = "/aidata/dataset/faces/CASIA-FaceV5/train"
    # root_ = "/aidata/dataset/faces/CASIA-FaceV5_Crop"
    # to 112*112 for arc training
    resize=1
    if not os.path.exists(root_):
        os.mkdir(root_)
    if not os.path.exists(root_ + "/" + mode):
        os.mkdir(root_ + "/" + mode)
    for key, imgs in pathinfo.items():
        newdir = os.path.join(root_, "train", os.path.split(key)[1])
        if not os.path.exists(newdir):
            os.mkdir(newdir)
        for img_p in imgs:
            img_raw = cv2.imread(os.path.join(key, img_p), cv2.IMREAD_COLOR)
            img = np.float32(img_raw)
            im_height, im_width, _ = img.shape
            scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
            img -= (104, 117, 123)
            img = img.transpose(2, 0, 1)
            img = torch.from_numpy(img).unsqueeze(0)
            img = img.to(device)
            scale = scale.to(device)

            loc, conf = net(img)
            priorbox = PriorBox(cfg, image_size=(im_height, im_width))
            priors = priorbox.forward()
            priors = priors.to(device)
            prior_data = priors.data
            boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
            boxes = boxes * scale / resize
            boxes = boxes.cpu().numpy()
            scores = conf.data.cpu().numpy()[:, 1]

            # ignore low scores
            inds = np.where(scores > confidence_threshold)[0]
            boxes = boxes[inds]
            scores = scores[inds]

            # keep top-K before NMS
            order = scores.argsort()[::-1][:top_k]
            boxes = boxes[order]
            scores = scores[order]

            # do NMS
            dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
            # keep = py_cpu_nms(dets, args.nms_threshold)
            keep = nms(dets, 0.3, force_cpu=False)    # nms_threshold
            dets = dets[keep, :]

            # keep top-K faster NMS
            dets = dets[:5, :]    # keep_top_k

            for inx, b in enumerate(dets):
                if b[4] < 0.7:
                    continue
                # text = "{:.4f}".format(b[4])
                b = list(map(int, b))
                crop_img = img_raw[b[1]:b[1]+b[3], b[0]:b[0]+b[2]]
                crop_resized = cv2.resize(crop_img, (112, 112))
                # cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
                # cx = b[0]
                # cy = b[1] + 12
                # cv2.putText(img_raw, text, (cx, cy),
                #             cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))
            cv2.imwrite(os.path.join(newdir, img_p, str(inx), '.jpg'), crop_img)
Пример #2
0
def face_detector(frame):
    img_raw = frame.copy()
    img = np.float32(img_raw)
    if resize != 1:
        img = cv2.resize(img,
                         None,
                         None,
                         fx=resize,
                         fy=resize,
                         interpolation=cv2.INTER_LINEAR)
    im_height, im_width, _ = img.shape
    scale = torch.Tensor(
        [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
    img -= (104, 117, 123)
    img = img.transpose(2, 0, 1)
    img = torch.from_numpy(img).unsqueeze(0)
    img = img.to(device)
    scale = scale.to(device)

    loc, conf, landms = net(img)  # forward pass
    priorbox = PriorBox(cfg, image_size=(im_height, im_width))
    priors = priorbox.forward()
    priors = priors.to(device)
    prior_data = priors.data
    boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
    boxes = boxes * scale / resize
    boxes = boxes.cpu().numpy()
    scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
    landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
    scale1 = torch.Tensor([
        img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3],
        img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2]
    ])
    scale1 = scale1.to(device)
    landms = landms * scale1 / resize
    landms = landms.cpu().numpy()

    # ignore low scores
    inds = np.where(scores > CONFIDENCE)[0]
    boxes = boxes[inds]
    landms = landms[inds]
    scores = scores[inds]

    # keep top-K before NMS
    order = scores.argsort()[::-1][:5000]
    boxes = boxes[order]
    landms = landms[order]
    scores = scores[order]

    # do NMS
    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
    keep = py_cpu_nms(dets, NMS_THRESHOLD)
    # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
    dets = dets[keep, :]
    landms = landms[keep]

    # keep top-K faster NMS
    dets = dets[:750, :]
    landms = landms[:750, :]

    dets = np.concatenate((dets, landms), axis=1)

    bboxs = []
    for b in dets:
        if b[4] < VIZ_THRESHOLD:
            continue
        b = list(map(int, b))

        margin = 10

        x1, y1, x2, y2 = b[0], b[1], b[2], b[3]

        img_h, img_w, _ = frame.shape
        w = x2 - x1
        h = y2 - y1
        margin = int(min(w, h) * margin / 100)
        x_a = x1 - margin
        y_a = y1 - margin
        x_b = x1 + w + margin
        y_b = y1 + h + margin
        if x_a < 0:
            x_b = min(x_b - x_a, img_w - 1)
            x_a = 0
        if y_a < 0:
            y_b = min(y_b - y_a, img_h - 1)
            y_a = 0
        if x_b > img_w:
            x_a = max(x_a - (x_b - img_w), 0)
            x_b = img_w
        if y_b > img_h:
            y_a = max(y_a - (y_b - img_h), 0)
            y_b = img_h

        name = ""
        print(name)
        face = frame[y_a:y_b, x_a:x_b]
        rgb = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
        encodings = face_recognition.face_encodings(rgb,
                                                    [(y_a, x_b, y_b, x_a)])
        matches = face_recognition.compare_faces(face_data["encodings"],
                                                 encodings[0],
                                                 tolerance=0.55)
        print(matches)
        if True in matches:
            matchedIdxs = [i for (i, b) in enumerate(matches) if b]
            counts = {}

            for i in matchedIdxs:
                name = face_data["names"][i]
                counts[name] = counts.get(name, 0) + 1

            name = max(counts, key=counts.get)
            print("name1", name)
        cv2.putText(img_raw, name, (x_a + 10, y_a), cv2.FONT_HERSHEY_SIMPLEX,
                    1, (255, 0, 255), 1, cv2.LINE_AA)
        cv2.rectangle(img_raw, (x_a, y_a), (x_b, y_b), (255, 0, 0), 1)
        bboxs.append([x_a, y_a, x_b, y_b])

    return img_raw, bboxs
Пример #3
0
def main():
    global args
    global minmum_loss
    args.gpu = 0
    args.world_size = 1

    if args.distributed:
        args.gpu = args.local_rank % torch.cuda.device_count()
        torch.cuda.set_device(args.gpu)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
        args.world_size = torch.distributed.get_world_size()

    args.total_batch_size = args.world_size * args.batch_size

    ## DATA loading code
    if args.dataset == 'COCO':
        if not os.path.exists(cfg['coco_root']):
            parser.error('Must specify dataset_root if specifying dataset')
        print("WARNING: Using default COCO dataset_root because " +
              "--dataset_root was not specified.")
        cfg = coco
        dataset = COCODetection(root=cfg['coco_root'],
                                transform=SSDAugmentation(
                                    cfg['min_dim'], MEANS))
    if args.dataset == 'VOC':
        cfg = voc
        dataset = VOCDetection(root=cfg['voc_root'],
                               transform=SSDAugmentation(
                                   cfg['min_dim'], MEANS))
    print('Training SSD on:', dataset.name)
    print('Loading the dataset...')
    train_loader = data.DataLoader(dataset,
                                   args.batch_size,
                                   num_workers=args.num_workers,
                                   shuffle=True,
                                   collate_fn=detection_collate,
                                   pin_memory=True)

    print("Build ssd network")
    model = build_ssd('train', cfg['min_dim'], cfg['num_classes'])
    if args.pretrained:
        vgg_weights = torch.load(args.save_folder + args.basenet)
        print('Loading base network...')
        model.vgg.load_state_dict(vgg_weights)

    model = model.cuda()
    # optimizer and loss function
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5,
                             False, True)

    ## get the priorbox of ssd
    priorbox = PriorBox(cfg)
    with torch.no_grad():
        priors = priorbox.forward()
        priors = priors.cuda()

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(
                args.resume,
                map_location=lambda storage, loc: storage.cuda(args.gpu))
            args.start_epoch = checkpoint['epoch']
            minmum_loss = checkpoint['minmum_loss']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    else:
        print('Initializing weights...')
        # initialize newly added layers' weights with xavier method
        model.extras.apply(weights_init)
        model.loc.apply(weights_init)
        model.conf.apply(weights_init)

    print('Using the specified args:')
    print(args)
    for epoch in range(args.start_epoch, args.epochs):
        # train for one epoch
        end = time.time()
        loss = train(train_loader, model, priors, criterion, optimizer, epoch)
        # remember best prec@1 and save checkpoint
        if args.local_rank == 0:
            is_best = loss < minmum_loss
            minmum_loss = min(loss, minmum_loss)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'best_prec1': minmum_loss,
                    'optimizer': optimizer.state_dict(),
                }, is_best, epoch)
        epoch_time = time.time() - end
        print('Epoch %s time cost %f' % (epoch, epoch_time))
Пример #4
0
def train(opt, train_dict, device, tb_writer=None):
    log_dir = Path(tb_writer.log_dir) if tb_writer else Path(
        train_dict['logdir']) / 'logs'
    wdir = str(log_dir / 'weights') + os.sep
    os.makedirs(wdir, exist_ok=True)
    last = wdir + 'last.pt'
    best = wdir + 'best.pt'
    results_file = 'results.txt'
    with open(log_dir / 'hyp.yaml', 'w') as f:
        yaml.dump(train_dict, f, sort_keys=False)
    with open(log_dir / 'opt.yaml', 'w') as f:
        yaml.dump(vars(opt), f, sort_keys=False)

    # Configure
    cuda = device.type != 'cpu'
    rank = opt.global_rank
    init_seeds(2 + rank)
    train_path = train_dict['train']
    test_path = train_dict['val']
    train_dict['weights'] = last if not train_dict['pretrain'] or (
        train_dict['pretrain'] and
        not os.path.exists(train_dict['weights'])) else train_dict['weights']
    model = RetinaFace(train_dict, phase='Train')
    pretrained = False
    if os.path.exists(train_dict['weights']):
        pretrained = True
        logger('Loading resume network from ====>{}'.format(
            train_dict['weights']))
        state_dict = torch.load(train_dict['weights'], map_location=device)
        # create new OrderedDict that does not contain `module.`
        from collections import OrderedDict
        new_state_dict = OrderedDict()
        for k, v in state_dict['model'].items():
            head = k[:7]
            if head == 'module.':
                name = k[7:]  # remove `module.`
            else:
                name = k
            new_state_dict[name] = v
        model.load_state_dict(new_state_dict)

    pg0, pg1, pg2 = [], [], []  # optimizer parameter groups
    for k, v in model.named_parameters():
        v.requires_grad = True
        if '.bias' in k:
            pg2.append(v)  # biases
        elif '.weight' in k and '.bn' not in k:
            pg1.append(v)  # apply weight decay
        else:
            pg0.append(v)  # all else

    if train_dict['adam']:
        optimizer = optim.Adam(pg0,
                               lr=train_dict['lr0'],
                               betas=(train_dict['momentum'],
                                      0.999))  # adjust beta1 to momentum
    else:
        optimizer = optim.SGD(pg0,
                              lr=train_dict['lr0'],
                              momentum=train_dict['momentum'],
                              nesterov=True)
    optimizer.add_param_group({
        'params': pg1,
        'weight_decay': train_dict['weight_decay']
    })  # add pg1 with weight_decay
    optimizer.add_param_group({'params': pg2})  # add pg2 (biases)
    logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' %
                (len(pg2), len(pg1), len(pg0)))
    del pg0, pg1, pg2

    epochs = train_dict['epoch']
    lf = lambda x: ((
        (1 + math.cos(x * math.pi / epochs)) / 2)**1.0) * 0.8 + 0.2  # cosine
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
    plot_lr_scheduler(optimizer, scheduler, epochs)

    # Resume
    start_epoch, best_fitness = 0, 0.0
    if pretrained:
        # Optimizer
        if state_dict['optimizer'] is not None:
            optimizer.load_state_dict(ckpt['optimizer'])
            best_fitness = state_dict['best_fitness']

        # Results
        if state_dict.get('training_results') is not None:
            with open(results_file, 'w') as file:
                file.write(state_dict['training_results'])  # write results.txt

        # Epochs
        start_epoch = state_dict['epoch'] + 1
        if epochs < start_epoch:
            logger.info(
                '%s has been trained for %g epochs. Fine-tuning for %g additional epochs.'
                % (weights, state_dict['epoch'], epochs))
            epochs += state_dict['epoch']  # finetune additional epochs

        del ckpt, state_dict

    if train_dict['sync_bn'] and cuda and rank != -1:
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)
        logger.info('Using SyncBatchNorm()')

    # Exponential moving average
    ema = ModelEMA(model) if rank in [-1, 0] else None

    # ddp
    if cuda and rank != -1:
        model = DDP(model,
                    device_ids=[opt.local_rank],
                    output_device=(opt.local_rank))

    # Trainloader
    batch_size = train_dict['batch_size']
    image_size = train_dict['image_size']
    # dataloader, dataset = create_dataloader(train_path,image_size, batch_size, opt, hyp=train_dict, augment=True,
    #                                         rect=opt.rect, rank=rank,
    #                                         world_size=opt.world_size, workers=train_dict['workers'])
    rgb_mean = (104, 117, 123)  # bgr order
    dataset = WiderFaceDetection(train_path, preproc(image_size, rgb_mean))
    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             num_workers=8,
                                             sampler=sampler,
                                             pin_memory=True,
                                             collate_fn=detection_collate)

    criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 7, 0.35, False)
    priorbox = PriorBox(train_dict, image_size=(image_size, image_size))
    with torch.no_grad():
        priors = priorbox.forward()
        priors = priors.cuda()
    for epoch in range(start_epoch, epochs):
        if rank != -1:
            dataloader.sampler.set_epoch(epoch)
        pbar = enumerate(dataloader)
        if rank in [-1, 0]:
            pbar = tqdm(pbar)  # progress bar
        optimizer.zero_grad()
        for i, (
                images, targets
        ) in pbar:  # batch -------------------------------------------------------------
            with amp.autocast(enabled=cuda):
                images = images.cuda()
                targets = [anno.cuda() for anno in targets]
                out = model(images)
                optimizer.zero_grad()
                loss_l, loss_c, loss_landm = criterion(
                    out, priors, targets) * opt.world_size
                loss = cfg['loc_weight'] * loss_l + loss_c + loss_landm
                loss.backward()
                optimizer.step()
                load_t1 = time.time()
                batch_time = load_t1 - load_t0
                eta = int(batch_time * (max_iter - iteration))
                if rank in [-1, 0]:
                    print(
                        'Epoch:{}/{} || Epochiter: {}/{} || Iter: {}/{} || Loc: {:.4f} Cla: {:.4f} Landm: {:.4f} || LR: {:.8f} || Batchtime: {:.4f} s || ETA: {}'
                        .format(epoch, max_epoch, (iteration % epoch_size) + 1,
                                epoch_size, iteration + 1, max_iter,
                                loss_l.item(), loss_c.item(),
                                loss_landm.item(), lr, batch_time,
                                str(datetime.timedelta(seconds=eta))))
                    torch.save(net.state_dict(),
                               wdir + os.sep + '{}_Final.pth'.format(i))
Пример #5
0
def face_detector(frame):
    img_raw = frame.copy()
    img = np.float32(img_raw)
    if resize != 1:
        img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR)
    im_height, im_width, _ = img.shape
    scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
    img -= (104, 117, 123)
    img = img.transpose(2, 0, 1)
    img = torch.from_numpy(img).unsqueeze(0)
    img = img.to(device)
    scale = scale.to(device)

    loc, conf, landms = net(img)  # forward pass
    priorbox = PriorBox(cfg, image_size=(im_height, im_width))
    priors = priorbox.forward()
    priors = priors.to(device)
    prior_data = priors.data
    boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
    boxes = boxes * scale / resize
    boxes = boxes.cpu().numpy()
    scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
    landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
    scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                            img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                            img.shape[3], img.shape[2]])
    scale1 = scale1.to(device)
    landms = landms * scale1 / resize
    landms = landms.cpu().numpy()

        # ignore low scores
    inds = np.where(scores > CONFIDENCE)[0]
    boxes = boxes[inds]
    landms = landms[inds]
    scores = scores[inds]

        # keep top-K before NMS
    order = scores.argsort()[::-1][:5000]
    boxes = boxes[order]
    landms = landms[order]
    scores = scores[order]

        # do NMS
    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
    keep = py_cpu_nms(dets, NMS_THRESHOLD)
        # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
    dets = dets[keep, :]
    landms = landms[keep]

        # keep top-K faster NMS
    dets = dets[:750, :]
    landms = landms[:750, :]

    dets = np.concatenate((dets, landms), axis=1)

    bboxs = []
    for b in dets:
        if b[4] < VIZ_THRESHOLD:
            continue
        b = list(map(int, b))
        
        margin = 10

        x1,y1,x2,y2 = b[0], b[1], b[2], b[3]
        
        img_h, img_w, _ = frame.shape
        w = x2-x1
        h = y2-y1
        margin = int(min(w,h) * margin / 100)
        x_a = x1 - margin
        y_a = y1 - margin
        x_b = x1 + w + margin
        y_b = y1 + h + margin
        if x_a < 0:
            x_b = min(x_b - x_a, img_w-1)
            x_a = 0
        if y_a < 0:
            y_b = min(y_b - y_a, img_h-1)
            y_a = 0
        if x_b > img_w:
            x_a = max(x_a - (x_b - img_w), 0)
            x_b = img_w
        if y_b > img_h:
            y_a = max(y_a - (y_b - img_h), 0)
            y_b = img_h
        
        cv2.rectangle(img_raw, (x_a, y_a), (x_b, y_b), (255, 0, 255), 1)
        bboxs.append([x_a,y_a,x_b,y_b])
        
    return img_raw,bboxs
Пример #6
0
def train():

    net = RetinaFace(cfg=cfg)
    logger.info("Printing net...")
    logger.info(net)

    if args.resume_net is not None:
        logger.info('Loading resume network...')
        state_dict = torch.load(args.resume_net)
        # create new OrderedDict that does not contain `module.`
        from collections import OrderedDict
        new_state_dict = OrderedDict()
        for k, v in state_dict.items():
            head = k[:7]
            if head == 'module.':
                name = k[7:] # remove `module.`
            else:
                name = k
            new_state_dict[name] = v
        net.load_state_dict(new_state_dict)

    if num_gpu > 1 and gpu_train:
        net = torch.nn.DataParallel(net).cuda()
    else:
        net = net.cuda()

    cudnn.benchmark = True

    priorbox = PriorBox(cfg, image_size=(img_dim, img_dim))
    with torch.no_grad():
        priors = priorbox.forward()
        priors = priors.cuda()

    net.train()
    epoch = 0 + args.resume_epoch
    logger.info('Loading Dataset...')

    trainset = WiderFaceDetection(training_dataset, preproc=train_preproc(img_dim, rgb_mean), mode='train')
    validset = WiderFaceDetection(training_dataset, preproc=valid_preproc(img_dim, rgb_mean), mode='valid')
    # trainset = WiderFaceDetection(training_dataset, transformers=train_transformers(img_dim), mode='train')
    # validset = WiderFaceDetection(training_dataset, transformers=valid_transformers(img_dim), mode='valid')
    trainloader = data.DataLoader(trainset, batch_size, shuffle=True, num_workers=num_workers, collate_fn=detection_collate)
    validloader = data.DataLoader(validset, batch_size, shuffle=True, num_workers=num_workers, collate_fn=detection_collate)
    logger.info(f'Totally {len(trainset)} training samples and {len(validset)} validating samples.')

    epoch_size = math.ceil(len(trainset) / batch_size)
    max_iter = max_epoch * epoch_size
    logger.info(f'max_epoch: {max_epoch:d} epoch_size: {epoch_size:d}, max_iter: {max_iter:d}')

    # optimizer = optim.SGD(net.parameters(), lr=initial_lr, momentum=momentum, weight_decay=weight_decay)
    optimizer = optim.Adam(net.parameters(), lr=initial_lr, weight_decay=weight_decay)
    scheduler = _utils.get_linear_schedule_with_warmup(optimizer, int(0.1 * max_iter), max_iter)
    criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 7, 0.35, False)

    stepvalues = (cfg['decay1'] * epoch_size, cfg['decay2'] * epoch_size)
    step_index = 0

    if args.resume_epoch > 0:
        start_iter = args.resume_epoch * epoch_size
    else:
        start_iter = 0

    best_loss_val = float('inf')
    for iteration in range(start_iter, max_iter):
        if iteration % epoch_size == 0:
            # create batch iterator
            # batch_iterator = iter(tqdm(trainloader, total=len(trainloader)))
            batch_iterator = iter(trainloader)
            # if (epoch % 10 == 0 and epoch > 0) or (epoch % 5 == 0 and epoch > cfg['decay1']):
            #     torch.save(net.state_dict(), save_folder + cfg['name']+ '_epoch_' + str(epoch) + '.pth')
            epoch += 1
            torch.cuda.empty_cache()

        if (valid_steps > 0) and (iteration > 0) and (iteration % valid_steps == 0):
            net.eval()
            # validation
            loss_l_val = 0.
            loss_c_val = 0.
            loss_landm_val = 0.
            loss_val = 0.
            # for val_no, (images, targets) in tqdm(enumerate(validloader), total=len(validloader)):
            for val_no, (images, targets) in enumerate(validloader):
                # load data
                images = images.cuda()
                targets = [anno.cuda() for anno in targets]
                # forward
                with torch.no_grad():
                    out = net(images)
                    loss_l, loss_c, loss_landm = criterion(out, priors, targets)
                    loss = cfg['loc_weight'] * loss_l + loss_c + loss_landm
                loss_l_val += loss_l.item()
                loss_c_val += loss_c.item()
                loss_landm_val += loss_landm.item()
                loss_val += loss.item()
            loss_l_val /= len(validloader)
            loss_c_val /= len(validloader)
            loss_landm_val /= len(validloader)
            loss_val /= len(validloader)
            logger.info('[Validating] Epoch:{}/{} || Epochiter: {}/{} || Iter: {}/{} || Total: {:.4f} Loc: {:.4f} Cla: {:.4f} Landm: {:.4f}'
                .format(epoch, max_epoch, (iteration % epoch_size) + 1,
                epoch_size, iteration + 1, max_iter, 
                loss_val, loss_l_val, loss_c_val, loss_landm_val))
            if loss_val < best_loss_val:
                best_loss_val = loss_val
                pth = os.path.join(save_folder, cfg['name'] + '_iter_' + str(iteration) + f'_{loss_val:.4f}_' + '.pth')
                torch.save(net.state_dict(), pth)
                logger.info(f'Best validating loss: {best_loss_val:.4f}, model saved as {pth:s})')
            net.train()

        load_t0 = time.time()
        # if iteration in stepvalues:
        #     step_index += 1
        # lr = adjust_learning_rate(optimizer, gamma, epoch, step_index, iteration, epoch_size)

        # load train data
        images, targets = next(batch_iterator)
        images = images.cuda()
        targets = [anno.cuda() for anno in targets]

        # forward
        out = net(images)

        # backprop
        optimizer.zero_grad()
        loss_l, loss_c, loss_landm = criterion(out, priors, targets)
        loss = cfg['loc_weight'] * loss_l + loss_c + loss_landm
        loss.backward()
        optimizer.step()
        scheduler.step()
        load_t1 = time.time()
        batch_time = load_t1 - load_t0
        eta = int(batch_time * (max_iter - iteration))
        if iteration % verbose_steps == 0:
            logger.info('[Training] Epoch:{}/{} || Epochiter: {}/{} || Iter: {}/{} || Total: {:.4f} Loc: {:.4f} Cla: {:.4f} Landm: {:.4f} || LR: {:.8f} || Batchtime: {:.4f} s || ETA: {}'
                .format(epoch, max_epoch, (iteration % epoch_size) + 1,
                epoch_size, iteration + 1, max_iter, 
                loss.item(), loss_l.item(), loss_c.item(), loss_landm.item(), 
                scheduler.get_last_lr()[-1], batch_time, str(datetime.timedelta(seconds=eta))))
Пример #7
0
def do_annotation_over_video(args, _dir, device, nets, resize, cfg):
    exts = ['mp4', 'avi']
    _dir = _dir.replace('\n', '')

    for video_path in glob.glob(_dir + '/*'):
        video_name = os.path.split(video_path)[-1]
        ext = os.path.splitext(video_name)[-1].split('.')[-1]
        if ext not in exts:
            continue
        video_name_no_ext = os.path.splitext(video_name)[0]
        target_dir = f'auto_annotation_labels/{video_name_no_ext}'
        if os.path.exists(target_dir):
            shutil.rmtree(target_dir)
        os.makedirs(target_dir, exist_ok=True)
        tagfile = os.path.join(target_dir, 'tag.csv')

        if os.path.exists(tagfile):
            os.remove(tagfile)
        cam = cv2.VideoCapture(video_path)
        frm_num = 0
        while True:
            _, img_raw = cam.read()
            if img_raw is None:
                break
            frm_num += 1
            if frm_num % 100 != 0:
                continue
            img_name = video_name + '_' + str(format(frm_num, '06d')) + '.jpg'
            img_raws = [img_raw.copy() for i in range(len(nets))]
            img = np.float32(img_raw)
            im_height, im_width, _ = img.shape
            scale = torch.Tensor(
                [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
            img -= (104, 117, 123)
            img = img.transpose(2, 0, 1)
            img = torch.from_numpy(img).unsqueeze(0)
            img = img.to(device)
            scale = scale.to(device)

            tic = time.time()
            all_boxes = []
            all_scores = []
            drawn_images = []
            for idx, net in enumerate(nets):
                loc, conf, landms = net(img)  # forward pass
                # print ('net forward time: {:.4f}'.format (time.time () - tic))
                priorbox = PriorBox(cfg, image_size=(im_height, im_width))
                priors = priorbox.forward()
                priors = priors.to(device)
                prior_data = priors.data
                boxes = decode(loc.data.squeeze(0), prior_data,
                               cfg['variance'])
                boxes = boxes * scale / resize
                boxes = boxes.cpu().numpy()
                scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
                landms = decode_landm(landms.data.squeeze(0), prior_data,
                                      cfg['variance'])
                scale1 = torch.Tensor([
                    img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                    img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                    img.shape[3], img.shape[2]
                ])
                scale1 = scale1.to(device)
                landms = landms * scale1 / resize
                landms = landms.cpu().numpy()

                # ignore low scores
                inds = np.where(scores > args.confidence_threshold)[0]
                boxes = boxes[inds]
                landms = landms[inds]
                scores = scores[inds]

                # keep top-K before NMS
                order = scores.argsort()[::-1][:args.top_k]
                boxes = boxes[order]
                landms = landms[order]
                scores = scores[order]

                # do NMS
                dets = np.hstack(
                    (boxes, scores[:, np.newaxis])).astype(np.float32,
                                                           copy=False)
                keep = py_cpu_nms(dets, args.nms_threshold)
                dets = dets[keep, :]
                landms = landms[keep]

                boxes = boxes[keep]
                all_boxes.append(
                    np.hstack(
                        [boxes,
                         np.full_like(boxes[:, :1], fill_value=idx)]))
                all_scores.append(scores[keep])

                # keep top-K faster NMS
                dets = dets[:args.keep_top_k, :]
                landms = landms[:args.keep_top_k, :]

                dets = np.concatenate((dets, landms), axis=1)

                # show image
                if args.save_image:
                    for b in dets:
                        if b[4] < args.vis_thres:
                            continue
                        text = "{:.4f}".format(b[4])
                        b = list(map(int, b))
                        cv2.rectangle(img_raws[idx], (b[0], b[1]),
                                      (b[2], b[3]), (255, 255, 255), 2)
                        cx = b[0]
                        cy = b[1] + 12
                        # cv2.putText (img_raws[idx], text, (cx, cy),
                        #              cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))

                    # landms
                    # cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4)
                    # cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4)
                    # cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4)
                    # cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4)

                    # cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4)
                    # save image
                    if idx == len(nets) - 1:  # last model in list of models
                        # name = os.path.join(os.getcwd (), f"det_results/{img_name}")
                        img_to_draw = np.vstack(img_raws)
                        # cv2.imwrite (name, cv2.resize(img_to_draw, dsize=None, fx=0.5, fy=0.5))

            all_boxes = np.concatenate(all_boxes, axis=0)
            all_scores = np.concatenate(all_scores, axis=0)
            # keep top-K before NMS
            order = all_scores.argsort()[::-1][:args.top_k]
            all_boxes = all_boxes[order]
            all_scores = all_scores[order]

            # do NMS
            dets = np.hstack(
                (all_boxes, all_scores[:, np.newaxis])).astype(np.float32,
                                                               copy=False)
            keep, dont_care = customized_cpu_nms(dets,
                                                 args.nms_threshold,
                                                 total_models=len(nets))
            dets_dc = dets[dont_care, :]
            dets = dets[keep, :]

            # keep top-K faster NMS
            dets = dets[:args.keep_top_k, :]
            dets_dc = dets_dc[:args.keep_top_k, :]
            dets = np.delete(dets, [4], axis=1)
            dets_dc = np.delete(dets_dc, [4], axis=1)
            bbox = np.hstack(
                [dets[:, :5],
                 np.full((dets.shape[0], 1), fill_value=4)])
            if bbox.shape[0] < 3:
                continue
            bbox_dc = np.hstack([
                dets_dc[:, :5],
                np.full((dets_dc.shape[0], 1), fill_value=10)
            ])
            bbox = np.vstack([bbox, bbox_dc])
            write_csv(tagfile, bbox, file_name=img_name)
            imgfile = f'auto_annotation_labels/{video_name_no_ext}/{img_name}'
            cv2.imwrite(imgfile, img_raw)
            # show image
            # if args.save_image:
            if 0:
                for b in dets:
                    if b[4] < args.vis_thres:
                        continue
                    text = "{:.4f}".format(b[4])
                    b = list(map(int, b))
                    cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]),
                                  (255, 0, 255), 2)
                    cx = b[0]
                    cy = b[1] + 12
                    cv2.putText(img_raw, text, (cx, cy),
                                cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))
                for b in dets_dc:
                    if b[4] < args.vis_thres:
                        continue
                    text = "{:.4f}".format(b[4])
                    b = list(map(int, b))
                    cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]),
                                  (0, 0, 255), 2)
                    cx = b[0]
                    cy = b[1] + 12
                    cv2.putText(img_raw, text, (cx, cy),
                                cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))

                # landms
                # cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4)
                # cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4)
                # cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4)
                # cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4)

                # cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4)
                # save image
                name = os.path.join(os.getcwd(), f"det_results/{img_name}")
                img_to_draw = cv2.resize(
                    img_to_draw,
                    dsize=(int(img_raw.shape[1] *
                               (img_raw.shape[0] / img_to_draw.shape[0])),
                           img_raw.shape[0]))
                img_to_draw = np.hstack([img_to_draw, img_raw])
                cv2.imwrite(name, img_to_draw)
Пример #8
0
    net = torch.nn.DataParallel(net, device_ids=list(range(num_gpu)))
    teacher_net = torch.nn.DataParallel(teacher_net,
                                        device_ids=list(range(num_gpu)))

device = torch.device('cuda:0' if gpu_train else 'cpu')
cudnn.benchmark = True
net = net.to(device)
teacher_net = teacher_net.to(device)

optimizer = optim.SGD(net.parameters(),
                      lr=initial_lr,
                      momentum=momentum,
                      weight_decay=weight_decay)
criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 7, 0.35, False)
tdkd = TDKD()
priorbox = PriorBox(cfg, image_size=(img_dim, img_dim))
with torch.no_grad():
    priors, priors_by_layer = priorbox.forward()
    priors = priors.to(device)


def train():
    net.train()
    teacher_net.eval()
    for param in teacher_net.parameters():
        param.requires_grad = False

    epoch = 0 + args.resume_epoch
    print('Loading Dataset...')

    dataset = WiderFaceDetection(training_dataset, preproc(img_dim, rgb_mean))
Пример #9
0
class SSD(nn.Module):
    """Single Shot Multibox Architecture
    The network is composed of a base VGG network followed by the
    added multibox conv layers.  Each multibox layer branches into
        1) conv2d for class conf scores
        2) conv2d for localization predictions
        3) associated priorbox layer to produce default bounding
           boxes specific to the layer's feature map size.
    See: https://arxiv.org/pdf/1512.02325.pdf for more details.

    Args:
        phase: (string) Can be "test" or "train"
        base: VGG16 layers for input, size of either 300 or 500
        extras: extra layers that feed to multibox loc and conf layers
        head: "multibox head" consists of loc and conf conv layers
    """
    def __init__(self, phase, base, extras, head, num_classes):
        super(SSD, self).__init__()
        self.phase = phase
        self.num_classes = num_classes
        # TODO: implement __call__ in PriorBox
        self.priorbox = PriorBox(v2)

        self.priors = Variable(self.priorbox.forward(), volatile=True)
        self.size = 300

        # SSD network
        self.vgg = nn.ModuleList(base)
        # Layer learns to scale the l2 normalized features from conv4_3
        self.L2Norm = L2Norm(512, 20)
        self.extras = nn.ModuleList(extras)

        # fused conv4_3 and conv5_3
        self.conv4_3 = nn.Conv2d(512, 512, 3, 1, 1)
        self.deconv = nn.ConvTranspose2d(512, 512, 2, 2)
        self.conv5_3 = nn.Conv2d(512, 512, 3, 1, 1)
        self.L2Norm5_3 = L2Norm(512, 10)
        self.fused_relu = nn.ReLU(inplace=True)

        self.loc = nn.ModuleList(head[0])
        self.conf = nn.ModuleList(head[1])

        if self.phase == 'test':
            self.softmax = nn.Softmax()
            self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)

    def forward(self, x):
        """Applies network layers and ops on input image(s) x.
        Args:
            x: input image or batch of images. Shape: [batch,3,300,300].
        Return:
            Depending on phase:
            test:
                Variable(tensor) of output class label predictions,
                confidence score, and corresponding location predictions for
                each object detected. Shape: [batch,topk,7]
            train:
                list of concat outputs from:
                    1: confidence layers, Shape: [batch*num_priors,num_classes]
                    2: localization layers, Shape: [batch,num_priors*4]
                    3: priorbox layers, Shape: [2,num_priors*4]
        """
        sources = list()
        loc = list()
        conf = list()

        # Apply vgg up to conv4_3 relu
        # Fused conv4_3,conv5_3
        for k in range(23):
            x = self.vgg[k](x)
        conv4_3 = self.conv4_3(x)
        s4_3 = self.L2Norm(conv4_3)

        for k in range(23, 30):
            x = self.vgg[k](x)
        deconv = self.deconv(x)
        conv5_3 = self.conv5_3(deconv)
        s5_3 = self.L2Norm5_3(conv5_3)

        # print(s4_3.size())
        # print(s5_3.size())
        s = s4_3 + s5_3
        s = self.fused_relu(s)
        sources.append(s)

        # apply vgg up to fc7
        for k in range(30, len(self.vgg)):
            x = self.vgg[k](x)
        sources.append(x)

        # Apply extra layers and cache source layer outputs
        for k, v in enumerate(self.extras):
            x = F.relu(v(x), inplace=True)
            if k % 2 == 1:
                sources.append(x)

        # Apply multibox head to source layers
        for (x, l, c) in zip(sources, self.loc, self.conf):
            loc.append(l(x).permute(0, 2, 3, 1).contiguous())
            conf.append(c(x).permute(0, 2, 3, 1).contiguous())

        loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
        conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)

        if self.phase == 'test':
            conf_preds = conf.view(-1, self.num_classes)
            conf_preds = self.softmax(conf_preds).view(conf.size(0), -1,
                                                       self.num_classes)
            # TODO 测试
            # loc = loc.view(loc.size(0), -1, 4)
            # print(loc.size())
            # print(conf_preds.size())
            # print(self.priors.size())
            output = self.detect(
                loc.view(loc.size(0), -1, 4),  # loc preds
                conf_preds,
                self.priors.type(type(x.data))  # default boxes
            )
        else:
            output = (loc.view(loc.size(0), -1,
                               4), conf.view(conf.size(0), -1,
                                             self.num_classes), self.priors)

        return output

    def load_weights(self, base_file):
        other, ext = os.path.splitext(base_file)
        if ext == '.pkl' or '.pth':
            print('Loading weights into state dict ...')
            self.load_state_dict(
                torch.load(base_file,
                           map_location=lambda storage, loc: storage))
            print('Finished!')

        else:
            print("Sorry only .pth or .pkl files supported.")

    def load_weights_fused(self, base_file):
        other, ext = os.path.splitext(base_file)
        if ext == '.pkl' or '.pth':
            print('Loading weights into state dict ...')
            params = torch.load(base_file,
                                map_location=lambda storage, loc: storage)
            onw_dict = self.state_dict()
            for k, v in onw_dict.items():
                param = params.get(k)
                if param is None:
                    continue
                v.copy_(param)
            print('Finished!')

        else:
            print("Sorry only .pth or .pkl files supported.")

    def load_weights_for_rosd(self, base_file):
        other, ext = os.path.splitext(base_file)
        if ext == '.pkl' or '.pth':
            print('Loading weights into state dict ...')
            params = torch.load(base_file,
                                map_location=lambda storage, loc: storage)
            own_dict = self.state_dict()
            for k, v in list(own_dict.items())[:-28]:
                param = params.get(k)
                if param is None:
                    continue
                v.copy_(param)
            print('Finished!')

        else:
            print("Sorry only .pth or .pkl files supported.")
Пример #10
0
def test_begin(img_name):
    image_path = os.path.join(args.test_dir, img_name)
    save_path = os.path.join(args.save_dir, img_name)
    # for i in range(100):
    # image_path = "./test_img/11008.jpg"
    img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR)

    img = np.float32(img_raw)

    im_height, im_width, _ = img.shape
    scale = torch.Tensor(
        [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
    img -= (104, 117, 123)
    img = img.transpose(2, 0, 1)
    img = torch.from_numpy(img).unsqueeze(0)
    img = img.to(device)
    scale = scale.to(device)

    tic = time.time()
    loc, conf, landms = net(img)  # forward pass
    print('net forward time: {:.4f}'.format(time.time() - tic))

    priorbox = PriorBox(cfg, image_size=(im_height, im_width))
    priors = priorbox.forward()
    priors = priors.to(device)
    prior_data = priors.data
    boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
    boxes = boxes * scale / resize
    boxes = boxes.cpu().numpy()
    scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
    landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
    scale1 = torch.Tensor([
        img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3],
        img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2]
    ])
    scale1 = scale1.to(device)
    landms = landms * scale1 / resize
    landms = landms.cpu().numpy()

    # ignore low scores
    inds = np.where(scores > args.confidence_threshold)[0]
    boxes = boxes[inds]
    landms = landms[inds]
    scores = scores[inds]

    # keep top-K before NMS
    order = scores.argsort()[::-1][:args.top_k]
    boxes = boxes[order]
    landms = landms[order]
    scores = scores[order]

    # do NMS
    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
    keep = py_cpu_nms(dets, args.nms_threshold)
    # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
    dets = dets[keep, :]
    landms = landms[keep]

    # keep top-K faster NMS
    dets = dets[:args.keep_top_k, :]
    landms = landms[:args.keep_top_k, :]

    dets = np.concatenate((dets, landms), axis=1)

    # show image
    if args.save_image:
        for b in dets:
            if b[4] < args.vis_thres:
                continue
            text = "{:.4f}".format(b[4])
            b = list(map(int, b))
            cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
            cx = b[0]
            cy = b[1] + 12
            cv2.putText(img_raw, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5,
                        (255, 255, 255))

            # landms
            cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4)
            cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4)
            cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4)
            cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4)
            cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4)
        # save image

        # save_path = "./save_img/test.jpg"
        cv2.imwrite(save_path, img_raw)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
net = net.to(device)
torch.backends.cudnn.benchmark = True

# vs = FileVideoStream("/home/mdt/ownCloud/datasets/Face_Recognition/output.avi").start()
vs = FileVideoStream(
    "/home/mdt/Downloads/Captures/ 2020-02-25 14-58-37.mp4").start()
# vs = VideoStream("rtsp://*****:*****@192.168.100.64:554").start()
# vs = VideoStream("rtsp://*****:*****@322nguyentrai.ddns.net:554").start()
# vs = VideoStream("rtsp://*****:*****@118.70.82.46:554").start()
# vs = VideoStream("rtsp://*****:*****@192.168.101.65:555").start()
# vs = VideoStream("rtsp://*****:*****@113.161.36.165:554").start()
fps = FPS().start()
net_inshape = (640, 640)  # h, w
rgb_mean = (104, 117, 123)  # bgr order
priorbox = PriorBox(cfg, image_size=net_inshape)
priors = priorbox.forward()
priors = priors.numpy()
while True:
    frame = vs.read()
    # frame = imutils.rotate_bound(frame, 90)
    if frame is None:
        break
    frame_raw = frame.copy()
    # image_path = "fail.jpg"
    # frame = cv2.imread(image_path, cv2.IMREAD_COLOR)
    # frame_raw = frame.copy()

    h, w = frame.shape[:2]
    d = max(h, w)
    dy = (d - h)
    def detect_faces(self, img, return_best=False):
        """
            Computes a list of faces detected in the input image in the form of a list of bounding-boxes, one per each detected face.
            Arguments:
                img: The image to be input to the RetinaFace model
                return_best: boolean indicating whether to return just to best detection or the complete list of detections
            Returns:
                A list of arrays. Each array contains the image coordinates of the corners of a bounding-box and the score of the detection
                in the form [x1,y1,x2,y2,score], where (x1,y1) are the integer coordinates of the top-left corner of the box and (x2,y2) are
                the coordinates of the bottom-right corner of the box. The score is a floating-point number.
                When return_best is True, the returned list will contain only one bounding-box
        """
        if numpy.all(img != None):
            try:
                im_height, im_width, _ = img.shape
                scale = torch.Tensor(
                    [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
                img = numpy.float32(img)
                img -= (104, 117, 123)
                img = img.transpose(2, 0, 1)
                img = torch.from_numpy(img).unsqueeze(0)
                img = img.to(self.device)
                scale = scale.to(self.device)

                # note below that the landmarks (3rd returned value) are ignored
                loc, conf, _ = self.net(img)

                priorbox = PriorBox(self.cfg, image_size=(im_height, im_width))
                priors = priorbox.forward()
                priors = priors.to(self.device)
                prior_data = priors.data
                boxes = decode(loc.data.squeeze(0), prior_data,
                               self.cfg['variance'])
                boxes = boxes * scale
                boxes = boxes.cpu().numpy()
                scores = conf.squeeze(0).data.cpu().numpy()[:, 1]

                # ignore low scores
                inds = numpy.where(scores > CONF_THRESH)[0]
                boxes = boxes[inds]
                scores = scores[inds]

                # keep top-K before NMS
                # order = scores.argsort()[::-1][:args.top_k]
                order = scores.argsort()[::-1]
                boxes = boxes[order]
                scores = scores[order]

                # do NMS
                dets = numpy.hstack(
                    (boxes, scores[:, numpy.newaxis])).astype(numpy.float32,
                                                              copy=False)
                keep = py_cpu_nms(dets, NMS_THRESH)

                # keep top-K faster NMS
                detections = dets[keep, :]

                if len(detections) > 0:
                    if return_best:
                        # detections is ordered by confidence so the first one is the best
                        det = numpy.squeeze(detections[0, 0:5])
                        bounding_box = numpy.zeros(5, dtype=numpy.float32)
                        # extend detection
                        extend_factor = self.face_rect_expand_factor
                        width = round(det[2] - det[0] + 1)
                        height = round(det[3] - det[1] + 1)
                        length = (width + height) / 2.0
                        centrepoint = [
                            round(det[0]) + width / 2.0,
                            round(det[1]) + height / 2.0
                        ]
                        bounding_box[0] = centrepoint[0] - round(
                            (1 + extend_factor) * length / 2.0)
                        bounding_box[1] = centrepoint[1] - round(
                            (1 + extend_factor) * length / 2.0)
                        bounding_box[2] = centrepoint[0] + round(
                            (1 + extend_factor) * length / 2.0)
                        bounding_box[3] = centrepoint[1] + round(
                            (1 + extend_factor) * length / 2.0)
                        # prevent going off image
                        bounding_box[0] = int(max(bounding_box[0], 0))
                        bounding_box[1] = int(max(bounding_box[1], 0))
                        bounding_box[2] = int(
                            min(bounding_box[2], img.shape[3]))
                        bounding_box[3] = int(
                            min(bounding_box[3], img.shape[2]))
                        bounding_box[4] = det[4]
                        return [bounding_box]
                    else:
                        det_list = []
                        for j in range(len(detections)):
                            det = numpy.squeeze(detections[j, 0:5])
                            bounding_box = numpy.zeros(5, dtype=numpy.float32)
                            # extend detection
                            extend_factor = self.face_rect_expand_factor
                            width = round(det[2] - det[0] + 1)
                            height = round(det[3] - det[1] + 1)
                            length = (width + height) / 2.0
                            centrepoint = [
                                round(det[0]) + width / 2.0,
                                round(det[1]) + height / 2.0
                            ]
                            bounding_box[0] = centrepoint[0] - round(
                                (1 + extend_factor) * length / 2.0)
                            bounding_box[1] = centrepoint[1] - round(
                                (1 + extend_factor) * length / 2.0)
                            bounding_box[2] = centrepoint[0] + round(
                                (1 + extend_factor) * length / 2.0)
                            bounding_box[3] = centrepoint[1] + round(
                                (1 + extend_factor) * length / 2.0)
                            # prevent going off image
                            bounding_box[0] = int(max(bounding_box[0], 0))
                            bounding_box[1] = int(max(bounding_box[1], 0))
                            bounding_box[2] = int(
                                min(bounding_box[2], img.shape[3]))
                            bounding_box[3] = int(
                                min(bounding_box[3], img.shape[2]))
                            bounding_box[4] = det[4]
                            det_list.append(bounding_box)
                        return det_list
                else:
                    return None

            except Exception as e:
                print('Exception in FaceDetectorRetinaFace: ' + str(e))
                pass

        return None
Пример #13
0
    def get_det(self, img):

        img_raw = cv2.imread(img, cv2.IMREAD_COLOR)

        img = np.float32(img_raw)

        im_height, im_width, _ = img.shape
        scale = torch.Tensor(
            [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        img -= (104, 117, 123)
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.to(self.device)
        scale = scale.to(self.device)

        tic = time.time()
        loc, conf, landms = self.net(img)  # forward pass
        print('net forward time: {:.4f}'.format(time.time() - tic))

        priorbox = PriorBox(self.cfg, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.to(self.device)
        prior_data = priors.data
        boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance'])
        boxes = boxes * scale / self.resize
        boxes = boxes.cpu().numpy()
        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
        landms = decode_landm(landms.data.squeeze(0), prior_data,
                              self.cfg['variance'])
        scale1 = torch.Tensor([
            img.shape[3], img.shape[2], img.shape[3], img.shape[2],
            img.shape[3], img.shape[2], img.shape[3], img.shape[2],
            img.shape[3], img.shape[2]
        ])
        scale1 = scale1.to(self.device)
        landms = landms * scale1 / self.resize
        landms = landms.cpu().numpy()

        # ignore low scores
        inds = np.where(scores > self.confidence_threshold)[0]
        boxes = boxes[inds]
        landms = landms[inds]
        scores = scores[inds]

        # keep top-K before NMS
        order = scores.argsort()[::-1][:self.top_k]
        boxes = boxes[order]
        landms = landms[order]
        scores = scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                                copy=False)
        keep = py_cpu_nms(dets, self.nms_threshold)
        # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
        dets = dets[keep, :]
        landms = landms[keep]

        # keep top-K faster NMS
        dets = dets[:self.keep_top_k, :]
        landms = landms[:self.keep_top_k, :]

        dets = np.concatenate((dets, landms), axis=1)
        crops = []
        cords = []
        for det in dets:
            if (det[4] < self.vis_thres):
                continue
            b = list(map(int, det))
            crop = img_raw[b[1]:b[3], b[0]:b[2]].copy()
            if (len(crops) < 10):
                crops.append(crop)
                cords.append(b)
        return crops, cords
Пример #14
0
def detect(img_path):

    torch.set_grad_enabled(False)
    cfg = None
    if args.network == "mobile0.25":
        cfg = cfg_mnet
    elif args.network == "resnet50":
        cfg = cfg_re50
    # net and model
    net = RetinaFace(cfg=cfg, phase='test')
    #net = FaceBoxes(phase='test', size=None, num_classes=2)
    net = load_model(net, args.trained_model, args.cpu)
    net.eval()

    #print('Finished loading model!')

    #print(net)
    cudnn.benchmark = True
    device = torch.device("cpu" if args.cpu else "cuda")
    net = net.to(device)

    _t = {'forward_pass': Timer(), 'misc': Timer()}
    resize = 1
    # testing begin
    # for i, img_name in enumerate(test_dataset):
    #     image_path = testset_folder + img_name + '.jpg'
    #     img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR)

    if type(img_path) is not np.ndarray:
        img = Image.open(img_path)
        if img.mode == 'L':
            img = img.convert('RGB')
        img_raw = np.array(img)
    else:
        img_raw = img_path

    #img_raw = img_path
    img = np.float32(img_raw)
    if resize != 1:
        img = cv2.resize(img,
                         None,
                         None,
                         fx=resize,
                         fy=resize,
                         interpolation=cv2.INTER_LINEAR)
    im_height, im_width, _ = img.shape
    scale = torch.Tensor(
        [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
    img -= (104, 117, 123)
    img = img.transpose(2, 0, 1)
    img = torch.from_numpy(img).unsqueeze(0)
    img = img.to(device)
    scale = scale.to(device)

    _t['forward_pass'].tic()
    loc, conf, landms = net(img)  # forward pass
    _t['forward_pass'].toc()
    _t['misc'].tic()
    priorbox = PriorBox(cfg, image_size=(im_height, im_width))
    #priorbox = PriorBox1(cfg, image_size=(im_height, im_width))
    priors = priorbox.forward()
    priors = priors.to(device)
    prior_data = priors.data
    boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
    boxes = boxes * scale / resize
    boxes = boxes.cpu().numpy()
    scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
    landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
    scale1 = torch.Tensor([
        img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3],
        img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2]
    ])
    scale1 = scale1.to(device)
    landms = landms * scale1 / resize
    landms = landms.cpu().numpy()

    # ignore low scores
    inds = np.where(scores > args.confidence_threshold)[0]
    boxes = boxes[inds]
    landms = landms[inds]
    scores = scores[inds]

    # keep top-K before NMS
    # order = scores.argsort()[::-1][:args.top_k]
    order = scores.argsort()[::-1]
    boxes = boxes[order]
    landms = landms[order]
    scores = scores[order]

    # do NMS
    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
    keep = py_cpu_nms(dets, args.nms_threshold)

    dets = dets[keep, :]
    landms = landms[keep]

    # keep top-K faster NMS
    # dets = dets[:args.keep_top_k, :]
    # landms = landms[:args.keep_top_k, :]

    dets = np.concatenate((dets, landms), axis=1)
    _t['misc'].toc()

    # save dets
    # if args.dataset == "FDDB":
    #     fw.write('{:s}\n'.format(img_name))
    #     fw.write('{:.1f}\n'.format(dets.shape[0]))
    #     for k in range(dets.shape[0]):
    #         xmin = dets[k, 0]
    #         ymin = dets[k, 1]
    #         xmax = dets[k, 2]
    #         ymax = dets[k, 3]
    #         score = dets[k, 4]
    #         w = xmax - xmin + 1
    #         h = ymax - ymin + 1
    #         # fw.write('{:.3f} {:.3f} {:.3f} {:.3f} {:.10f}\n'.format(xmin, ymin, w, h, score))
    #         fw.write('{:d} {:d} {:d} {:d} {:.10f}\n'.format(int(xmin), int(ymin), int(w), int(h), score))
    print('forward_pass_time: {:.4f}s misc: {:.4f}s'.format(
        _t['forward_pass'].average_time, _t['misc'].average_time))

    # if type(img_path) is not np.ndarray:
    #     img_raw = cv2.imread(img_path, cv2.IMREAD_COLOR)
    # else:
    #     img_raw = img_path

    # # show image
    # if args.save_image:
    #     for b in dets:
    #         if b[4] < args.vis_thres:
    #             continue
    #         text = "{:.4f}".format(b[4])
    #         b = list(map(int, b))
    #         cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
    #         cx = b[0]
    #         cy = b[1] + 12
    #         cv2.putText(img_raw, text, (cx, cy),
    #                     cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))

    #         # landms
    #         cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4)
    #         cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4)
    #         cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4)
    #         cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4)
    #         cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4)
    # save image
    # if not os.path.exists("./results/"):
    #     os.makedirs("./results/")
    # name = "./results/" + str(i) + ".jpg"
    # cv2.imwrite(name, img_raw)
    return dets, img_path
Пример #15
0
    def detect(self,
               img,
               origin_size=True,
               target_size=480,
               max_size=2150,
               confidence_threshold=0.7,
               nms_threshold=0.35,
               top_k=5000,
               keep_top_k=750):

        img, resize = self._process_image(img, origin_size, target_size,
                                          max_size)
        img = img.to(self.device)
        _, _, im_height, im_width = img.size()

        # anchor
        priorbox = PriorBox(self.cfg, image_size=(im_height, im_width))
        priors = priorbox.forward().to(self.device)
        prior_data = priors.data

        # forward
        loc, conf, landms = self(img)

        # decoder output
        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
        boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance'])
        scale = torch.Tensor([
            im_width,
            im_height,
            im_width,
            im_height,
        ]).to(self.device)
        boxes = boxes * scale / resize
        boxes = boxes.cpu().numpy()
        landms = decode_landm(landms.data.squeeze(0), prior_data,
                              self.cfg['variance'])
        scale1 = torch.Tensor([
            im_width,
            im_height,
            im_width,
            im_height,
            im_width,
            im_height,
            im_width,
            im_height,
            im_width,
            im_height,
        ]).to(self.device)
        landms = landms * scale1 / resize
        landms = landms.cpu().numpy()

        # ignore low scores
        inds = np.where(scores > confidence_threshold)[0]
        boxes = boxes[inds]
        landms = landms[inds]
        scores = scores[inds]

        # keep top-K before NMS
        order = scores.argsort()[::-1]
        # order = scores.argsort()[::-1][:top_k]
        boxes = boxes[order]
        landms = landms[order]
        scores = scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                                copy=False)
        keep = py_cpu_nms(dets, nms_threshold)
        dets = dets[keep, :]
        landms = landms[keep]

        # keep top-K faster NMS
        dets = dets[:keep_top_k, :]
        landms = landms[:keep_top_k, :]

        # dets = np.concatenate((dets, landms), axis=1)
        scores = dets[:, -1]  # (N,)
        dets = dets[:, :-1]  # (N, 4)
        landms = landms.reshape(-1, 5, 2)  # (N, 5, 2)
        return scores, dets, landms
Пример #16
0
def train():
    if args.dataset == 'COCO':
        cfg = coco
        dataset = COCODetection(root=cfg['coco_root'],
                                transform=SSDAugmentation(cfg['min_dim'],
                                                          MEANS))
    if args.dataset == 'VOC':
        cfg = voc
        dataset = VOCDetection(root=cfg['voc_root'],
                               transform=SSDAugmentation(cfg['min_dim'],
                                                         MEANS))

    if args.visdom:
        import visdom
        viz = visdom.Visdom()

    ssd_net = build_ssd('train', cfg['min_dim'], cfg['num_classes'])
    net = ssd_net

    if args.cuda:
        net = torch.nn.DataParallel(ssd_net)
        cudnn.benchmark = True

    if args.resume:
        print('Resuming training, loading {}...'.format(args.resume))
        ssd_net.load_weights(args.resume)
    else:
        vgg_weights = torch.load(args.save_folder + args.basenet)
        print('Loading base network...')
        ssd_net.vgg.load_state_dict(vgg_weights)

    if args.cuda:
        net = net.cuda()

    if not args.resume:
        print('Initializing weights...')
        # initialize newly added layers' weights with xavier method
        ssd_net.extras.apply(weights_init)
        ssd_net.loc.apply(weights_init)
        ssd_net.conf.apply(weights_init)

    optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum,
                          weight_decay=args.weight_decay)
    criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5,
                             False, args.cuda)

    priorbox = PriorBox(cfg)
    with torch.no_grad():
        priors = priorbox.forward()
        priors = priors.cuda()
    
    net.train()
    # loss counters
    loc_loss = 0
    conf_loss = 0
    epoch = 0
    print('Loading the dataset...')

    epoch_size = len(dataset) // args.batch_size
    print('Training SSD on:', dataset.name)
    print('Using the specified args:')
    print(args)

    step_index = 0

    if args.visdom:
        vis_title = 'SSD.PyTorch on ' + dataset.name
        vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss']
        iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend)
        epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend)

    data_loader = data.DataLoader(dataset, args.batch_size,
                                  num_workers=args.num_workers,
                                  shuffle=True, collate_fn=detection_collate,
                                  pin_memory=True)
    # create batch iterator
    batch_iterator = iter(data_loader)
    for iteration in range(args.start_iter, cfg['max_iter']):
        if args.visdom and iteration != 0 and (iteration % epoch_size == 0):
            update_vis_plot(epoch, loc_loss, conf_loss, epoch_plot, None,
                            'append', epoch_size)
            # reset epoch loss counters
            loc_loss = 0
            conf_loss = 0
            epoch += 1

        if iteration in cfg['lr_steps']:
            step_index += 1
            adjust_learning_rate(optimizer, args.gamma, step_index)

        # load train data
        try:
            images, targets = next(batch_iterator)
        except StopIteration:
            batch_iterator = iter(data_loader)
            images, targets = next(batch_iterator)

        if args.cuda:
            images = Variable(images.cuda())
            targets = [Variable(ann.cuda(), requires_grad=False) for ann in targets]
        else:
            images = Variable(images)
            targets = [Variable(ann, requires_grad=False) for ann in targets]
        # forward
        t0 = time.time()
        out = net(images)
        # backprop
        optimizer.zero_grad()
        loss_l, loss_c = criterion(out, priors, targets)
        loss = loss_l + loss_c
        loss.backward()
        optimizer.step()
        t1 = time.time()
        loc_loss += loss_l.item()
        conf_loss += loss_c.item()

        if iteration % 10 == 0:
            print('timer: %.4f sec.' % (t1 - t0))
            print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.item()), end=' ')

        if args.visdom:
            update_vis_plot(iteration, loss_l.item(), loss_c.item(),
                            iter_plot, epoch_plot, 'append')

        if iteration != 0 and iteration % 5000 == 0:
            print('Saving state, iter:', iteration)
            torch.save(ssd_net.state_dict(), 'weights/ssd300_COCO_' +
                       repr(iteration) + '.pth')
    torch.save(ssd_net.state_dict(),
               args.save_folder + '' + args.dataset + '.pth')
Пример #17
0
    net.load_state_dict(new_state_dict)

if num_gpu > 1 and gpu_train:
    net = torch.nn.DataParallel(net).cuda()
else:
    net = net.cuda()

cudnn.benchmark = True

optimizer = optim.SGD(net.parameters(),
                      lr=initial_lr,
                      momentum=momentum,
                      weight_decay=weight_decay)
criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 7, 0.35, False)

priorbox = PriorBox(cfg, image_size=(img_dim, img_dim))
with torch.no_grad():
    priors = priorbox.forward()
    priors = priors.cuda()


def get_path_dict(train_dir):
    '''Get path dictionary for image paths with/without mask type.
       - key: image path without mask type
       - value: image path with mask type 
    '''
    MASK_TYPES = ('cloth', 'surgical_blue', 'surgical', 'KN95')
    path_dict = {}
    for img_path in glob.glob(f'{train_dir}/**/*.jpg'):
        for mtype in MASK_TYPES:
            if mtype in img_path:
Пример #18
0
            #net.setInput(blob)
            # Runs the forward pass to get output of the output layers
            scale = torch.Tensor([
                frame.shape[1], frame.shape[0], frame.shape[1], frame.shape[0]
            ])
            frame -= (104, 117, 123)
            frame = frame.transpose(2, 0, 1)
            frame = torch.from_numpy(frame).unsqueeze(0)
            frame = frame.to(device)
            scale = scale.to(device)

            _t['forward_pass'].tic()
            loc, conf = net(frame)  # forward pass
            _t['forward_pass'].toc()
            _t['misc'].tic()
            priorbox = PriorBox(cfg, image_size=(IMG_WIDTH, IMG_HEIGHT))
            priors = priorbox.forward()
            priors = priors.to(device)
            prior_data = priors.data
            boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
            boxes = boxes * scale / resize
            boxes = boxes.cpu().numpy()
            scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
            # Remove the bounding boxes with low confidence
            # faces = post_process(frame, outs, CONF_THRESHOLD, NMS_THRESHOLD)
            inds = np.where(scores > args.confidence_threshold)[0]
            boxes = boxes[inds]
            scores = scores[inds]
            # keep top-K before NMS
            order = scores.argsort()[::-1][:args.top_k]
            boxes = boxes[order]
Пример #19
0
def main():
    args = arg_parse()

    filename, extension = splitext(basename(args.input))
    print("Loading file [{}] ....".format(filename))

    if not exists(args.input):
        raise ValueError("File [{}] is not recognized".format(args.input))

    if not isfile(args.trained_model):
        raise ValueError(f'The model {args.trained_model} is not found')

    if not exists(args.output_dir):
        print(
            f'Output directory {args.output_dir} does not exist, Creating one')
        makedirs(args.output_dir)

    torch.set_grad_enabled(False)
    cfg = cfg_mnet if args.network == "mobile0.25" else cfg_re50
    resize = 1
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    net = model_cfg(args.trained_model, cfg=cfg, device=device, cpu=args.cpu)

    if is_video(extension):
        vdo = cv2.VideoCapture()
        codec = cv2.VideoWriter_fourcc(*'XVID')
        output = join(args.output_dir, filename + '.avi')

        if vdo.open(args.input):
            property_id = int(cv2.CAP_PROP_FRAME_COUNT)
            total_frames = int(cv2.VideoCapture.get(vdo, property_id))
            width = int(vdo.get(cv2.CAP_PROP_FRAME_WIDTH))
            height = int(vdo.get(cv2.CAP_PROP_FRAME_HEIGHT))
            fps = vdo.get(cv2.CAP_PROP_FPS)
            writer = cv2.VideoWriter(output, codec, fps, (width, height))

            print('')
            print('processing video ...')
            frame_idx = 0
            with tqdm(range(total_frames)) as pbar:
                while vdo.grab():
                    frame_idx += 1
                    pbar.update(1)
                    ret, frame = vdo.retrieve()

                    if not ret:
                        break

                    args.step = 1 if args.step < 1 else args.step
                    if frame_idx % args.step == 0:
                        frame = pipeline(net, frame, args, device, resize, cfg)
                        writer.write(frame)

            pbar.close()
            print('process finished Successfully')
            print('process finished. file is stored as {}'.format(output))

            vdo.release()
            writer.release()

    elif is_image(extension):
        frame = cv2.imread(args.input)
        img = np.float32(frame)
        im_height, im_width, _ = img.shape
        scale = torch.Tensor(
            [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        img -= (104, 117, 123)
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.to(device)
        scale = scale.to(device)

        loc, conf, landms = net(img)  # forward pass
        priorbox = PriorBox(cfg, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.to(device)
        prior_data = priors.data
        boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
        boxes = boxes * scale / resize
        boxes = boxes.cpu().numpy()
        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
        landms = decode_landm(landms.data.squeeze(0), prior_data,
                              cfg['variance'])
        scale1 = torch.Tensor([
            img.shape[3], img.shape[2], img.shape[3], img.shape[2],
            img.shape[3], img.shape[2], img.shape[3], img.shape[2],
            img.shape[3], img.shape[2]
        ])
        scale1 = scale1.to(device)
        landms = landms * scale1 / resize
        landms = landms.cpu().numpy()

        # ignore low scores
        inds = np.where(scores > args.confidence_threshold)[0]
        boxes = boxes[inds]
        landms = landms[inds]
        scores = scores[inds]

        # keep top-K before NMS
        order = scores.argsort()[::-1][:args.top_k]
        boxes = boxes[order]
        landms = landms[order]
        scores = scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                                copy=False)
        keep = py_cpu_nms(dets, args.nms_threshold)
        dets = dets[keep, :]
        landms = landms[keep]

        # keep top-K faster NMS
        dets = dets[:args.keep_top_k, :]
        landms = landms[:args.keep_top_k, :]
        dets = np.concatenate((dets, landms), axis=1)

        objects_to_draw = dict(draw_box=True,
                               draw_text=False,
                               draw_landmarks=False)
        frame = draw(frame, dets, args.vis_thres, **objects_to_draw)

        output = args.output_dir + filename + '.jpg'
        cv2.imwrite(output, frame)
        print('output is stored as {}'.format(output))

    else:
        print('cant read video {}'.format(args.input))
    def detect_faces(self, img_raw, mean=(104, 117, 123)):
        img = np.float32(img_raw)

        im_height, im_width, _ = img.shape
        scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        img -= mean
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.to(self.device)
        scale = scale.to(self.device)

        tic = time.time()
        loc, conf, landms = self.net(img)  # forward pass
        # print('net forward time: {:.4f}'.format(time.time() - tic))

        priorbox = PriorBox(self.cfg, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.to(self.device)
        prior_data = priors.data
        boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance'])
        boxes = boxes * scale / self.resize
        boxes = boxes.cpu().numpy()
        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
        landms = decode_landm(landms.data.squeeze(0), prior_data, self.cfg['variance'])
        scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                               img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                               img.shape[3], img.shape[2]])
        scale1 = scale1.to(self.device)
        landms = landms * scale1 / self.resize
        landms = landms.cpu().numpy()

        # ignore low scores
        inds = np.where(scores > self.confidence_threshold)[0]
        boxes = boxes[inds]
        landms = landms[inds]
        scores = scores[inds]

        # keep top-K before NMS
        order = scores.argsort()[::-1][:self.top_k]
        boxes = boxes[order]
        landms = landms[order]
        scores = scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
        keep = py_cpu_nms(dets, self.nms_threshold)
        # keep = nms(dets, self.nms_threshold,force_cpu=self.cpu)
        dets = dets[keep, :]
        landms = landms[keep]

        # keep top-K faster NMS
        dets = dets[:self.keep_top_k, :]
        landms = landms[:self.keep_top_k, :]

        dets = np.concatenate((dets, landms), axis=1)

        # show image
        if self.show_image:
            for b in dets:
                if b[4] < self.vis_thres:
                    continue
                text = "{:.4f}".format(b[4])
                b = list(map(int, b))
                cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
                cx = b[0]
                cy = b[1] + 12
                cv2.putText(img_raw, text, (cx, cy),
                            cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))

                # landms
                cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4)
                cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4)
                cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4)
                cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4)
                cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4)

                # Show image
                cv2.imshow('result', img_raw)
                cv2.waitKey(100)

        results = []
        for det in dets:
            box = det[:4]
            score = det[4]
            keypoints = det[5:]

            if score < self.vis_thres:
                continue
            results.append({'box':box.tolist(), 'score':score.tolist(), 'keypoints':keypoints.tolist()})

        return results
Пример #21
0
def do_annotation_over_dir(args, _dir, device, nets, resize, cfg):
    exts = ['jpeg', 'png', 'jpg', 'bmp']
    _dir = _dir.replace('\n', '')
    last_dir = _dir.split('/')[-1]
    os.makedirs(f'auto_annotation_labels/{last_dir}', exist_ok=True)
    tagfile = os.path.join(f'auto_annotation_labels/{last_dir}',
                           'auto_tag.csv')
    if os.path.exists(tagfile):
        os.remove(tagfile)
    assert os.path.exists(_dir), f'{_dir} does not exist'
    for image_path in glob.glob(_dir + '/*'):
        img_name = os.path.split(image_path)[-1]
        ext = os.path.splitext(img_name)[-1].split('.')[-1]
        if ext not in exts:
            continue
        img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR)
        img_raws = [img_raw.copy(), img_raw.copy()]
        img = np.float32(img_raw)

        im_height, im_width, _ = img.shape
        scale = torch.Tensor(
            [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        img -= (104, 117, 123)
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.to(device)
        scale = scale.to(device)

        tic = time.time()
        all_boxes = []
        all_landms = []
        all_scores = []
        drawn_images = []
        for idx, net in enumerate(nets):
            loc, conf, landms = net(img)  # forward pass
            # print ('net forward time: {:.4f}'.format (time.time () - tic))
            priorbox = PriorBox(cfg, image_size=(im_height, im_width))
            priors = priorbox.forward()
            priors = priors.to(device)
            prior_data = priors.data
            boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
            boxes = boxes * scale / resize
            boxes = boxes.cpu().numpy()
            scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
            landms = decode_landm(landms.data.squeeze(0), prior_data,
                                  cfg['variance'])
            scale1 = torch.Tensor([
                img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                img.shape[3], img.shape[2]
            ])
            scale1 = scale1.to(device)
            landms = landms * scale1 / resize
            landms = landms.cpu().numpy()

            # ignore low scores
            inds = np.where(scores > args.confidence_threshold)[0]
            boxes = boxes[inds]
            landms = landms[inds]
            scores = scores[inds]

            all_boxes.append(boxes)
            all_landms.append(landms)
            all_scores.append(scores)

            # keep top-K before NMS
            order = scores.argsort()[::-1][:args.top_k]
            boxes = boxes[order]
            landms = landms[order]
            scores = scores[order]

            # do NMS
            dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                                    copy=False)
            keep = py_cpu_nms(dets, args.nms_threshold)
            # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
            dets = dets[keep, :]
            landms = landms[keep]

            # keep top-K faster NMS
            dets = dets[:args.keep_top_k, :]
            landms = landms[:args.keep_top_k, :]

            dets = np.concatenate((dets, landms), axis=1)

            # show image
            if args.save_image:
                for b in dets:
                    if b[4] < args.vis_thres:
                        continue
                    text = "{:.4f}".format(b[4])
                    b = list(map(int, b))
                    cv2.rectangle(img_raws[idx], (b[0], b[1]), (b[2], b[3]),
                                  (0, 0, 255), 2)
                    cx = b[0]
                    cy = b[1] + 12
                    # cv2.putText (img_raws[idx], text, (cx, cy),
                    #              cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))

                # landms
                # cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4)
                # cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4)
                # cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4)
                # cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4)

                # cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4)
                # save image
                if idx == len(nets) - 1:  # last model in list of models
                    # name = os.path.join(os.getcwd (), f"det_results/{img_name}")
                    img_to_draw = np.vstack(img_raws)
                    # cv2.imwrite (name, cv2.resize(img_to_draw, dsize=None, fx=0.5, fy=0.5))

        all_boxes = np.concatenate(all_boxes, axis=0)
        all_landms = np.concatenate(all_landms, axis=0)
        all_scores = np.concatenate(all_scores, axis=0)
        # keep top-K before NMS
        order = all_scores.argsort()[::-1][:args.top_k]
        all_boxes = all_boxes[order]
        all_landms = all_landms[order]
        all_scores = all_scores[order]

        # do NMS
        dets = np.hstack(
            (all_boxes, all_scores[:, np.newaxis])).astype(np.float32,
                                                           copy=False)
        keep = py_cpu_nms(dets, args.nms_threshold)
        # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
        dets = dets[keep, :]
        all_landms = all_landms[keep]

        # keep top-K faster NMS
        dets = dets[:args.keep_top_k, :]
        all_landms = all_landms[:args.keep_top_k, :]
        dets = np.concatenate((dets, all_landms), axis=1)
        bbox = np.hstack([dets[:, :5], np.ones((dets.shape[0], 1))])
        write_csv(tagfile, bbox, file_name=img_name)

        # show image
        if args.save_image:
            for b in dets:
                if b[4] < args.vis_thres:
                    continue
                text = "{:.4f}".format(b[4])
                b = list(map(int, b))
                cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]),
                              (255, 0, 255), 2)
                cx = b[0]
                cy = b[1] + 12
            # cv2.putText(img_raw, text, (cx, cy),
            # 			cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))

            # landms
            # cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4)
            # cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4)
            # cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4)
            # cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4)

            # cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4)
            # save image
            name = os.path.join(os.getcwd(), f"det_results/{img_name}")
            img_to_draw = cv2.resize(
                img_to_draw,
                dsize=(int(img_raw.shape[1] *
                           (img_raw.shape[0] / img_to_draw.shape[0])),
                       img_raw.shape[0]))
            img_to_draw = np.hstack([img_to_draw, img_raw])
            cv2.imwrite(name, img_to_draw)
Пример #22
0
        else:
            name = k
        new_state_dict[name] = v
    net.load_state_dict(new_state_dict)

if args.ngpu > 1 and gpu_train:
    net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu)))

device = torch.device('cuda:0' if gpu_train else 'cpu')
cudnn.benchmark = True
net = net.to(device)

optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 7, 0.35, False)

priorbox = PriorBox(cfg)
with torch.no_grad():
    priors = priorbox.forward()
    priors = priors.to(device)


def train():
    net.train()
    epoch = 0 + args.resume_epoch
    print('Loading Dataset...')

    dataset = VOCDetection(args.training_dataset, preproc(img_dim, rgb_means), AnnotationTransform())

    epoch_size = math.ceil(len(dataset) / args.batch_size)
    max_iter = args.max_epoch * epoch_size
Пример #23
0
        img = np.float32(img_raw)

        im_height, im_width, _ = img.shape
        scale = torch.Tensor(
            [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        img -= (104, 117, 123)
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.to(device)
        scale = scale.to(device)

        tic = time.time()
        loc, conf, landms = net(img)  # forward pass
        print('net forward time: {:.4f}'.format(time.time() - tic))

        priorbox = PriorBox(cfg, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.to(device)
        prior_data = priors.data
        boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
        boxes = boxes * scale / resize
        boxes = boxes.cpu().numpy()
        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
        landms = decode_landm(landms.data.squeeze(0), prior_data,
                              cfg['variance'])
        scale1 = torch.Tensor([
            img.shape[3], img.shape[2], img.shape[3], img.shape[2],
            img.shape[3], img.shape[2], img.shape[3], img.shape[2],
            img.shape[3], img.shape[2]
        ])
        scale1 = scale1.to(device)
def detect(net,
           img,
           cfg,
           size=(200, 400),
           confidence_threshold=0.02,
           nms_threshold=0.4):
    # testing scale
    img = np.float32(img)
    device = torch.device('cuda')
    target_size = size[0]
    max_size = size[1]
    im_shape = img.shape
    im_size_min = np.min(im_shape[0:2])
    im_size_max = np.max(im_shape[0:2])
    resize = float(target_size) / float(im_size_min)
    # prevent bigger axis from being more than max_size:
    if np.round(resize * im_size_max) > max_size:
        resize = float(max_size) / float(im_size_max)
    # if args.origin_size:
    #     resize = 1

    if resize != 1:
        img = cv2.resize(img,
                         None,
                         None,
                         fx=resize,
                         fy=resize,
                         interpolation=cv2.INTER_LINEAR)
    im_height, im_width, _ = img.shape
    scale = torch.Tensor(
        [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
    img -= (104, 117, 123)
    img = img.transpose(2, 0, 1)
    img = torch.from_numpy(img).unsqueeze(0)
    img = img.to(device)
    scale = scale.to(device)

    loc, conf, landms = net(img)  # forward pass
    priorbox = PriorBox(cfg, image_size=(im_height, im_width))
    priors = priorbox.forward()
    priors = priors.to(device)
    prior_data = priors.data
    boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
    boxes = boxes * scale / resize
    boxes = boxes.cpu().numpy()
    scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
    landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
    scale1 = torch.Tensor([
        img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3],
        img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2]
    ])
    scale1 = scale1.to(device)
    landms = landms * scale1 / resize
    landms = landms.cpu().numpy()

    # ignore low scores
    inds = np.where(scores > confidence_threshold)[0]
    boxes = boxes[inds]
    landms = landms[inds]
    scores = scores[inds]

    # keep top-K before NMS
    order = scores.argsort()[::-1]
    # order = scores.argsort()[::-1][:args.top_k]
    boxes = boxes[order]
    landms = landms[order]
    scores = scores[order]

    # do NMS
    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
    keep = py_cpu_nms(dets, nms_threshold)
    dets = dets[keep, :]
    landms = landms[keep]

    dets = np.concatenate((dets, landms), axis=1)
    return dets
Пример #25
0
def main():
    cfg = None
    if args.network == "mobile0.25":
        cfg = cfg_mnet
    elif args.network == "resnet18":
        cfg = cfg_re18
    elif args.network == "resnet34":
        cfg = cfg_re34
    elif args.network == "resnet50":
        cfg = cfg_re50
    elif args.network == "Efficientnet-b0":
        cfg = cfg_eff_b0
    elif args.network == "Efficientnet-b4":
        cfg = cfg_eff_b4
    elif args.network == "resnet34_hsfd":
        cfg = cfg_re34_hsfd_finetune
    elif args.network == "resnet34_hsfd_not_finetune":
        cfg = cfg_re34_hsfd_not_finetune

    # net and model
    net = RetinaFace(cfg=cfg, phase='test')
    net = load_model(net, args.trained_model, args.cpu)
    net.eval()
    print('Finished loading model!')
    print(net)
    cudnn.benchmark = True
    device = torch.device("cpu" if args.cpu else "cuda")
    net = net.to(device)

    # # testing dataset
    # testset_folder = args.dataset_folder
    # # testset_list = args.dataset_folder[:-7] + "wider_val.txt"
    # # with open(testset_list, 'r') as fr:
    # #     test_dataset = fr.read().split()
    # test_dataset = []
    # for event in os.listdir(testset_folder):
    #     subdir = os.path.join(testset_folder, event)
    #     img_names = os.listdir(subdir)
    #     for img_name in img_names:
    #         test_dataset.append([event, os.path.join(subdir, img_name)])
    # num_images = len(test_dataset)

    used_channels = cfg['used_channels']
    img_dim = cfg['image_size']
    test_dataset = EcustHsfdDetection(args.dataset_file,
                                      used_channels,
                                      preproc=valid_preproc(img_dim, None),
                                      mode='valid')
    num_images = len(test_dataset)
    datadir = '/'.join(args.dataset_file.split('/')[:-1])

    pred_file = os.path.join(f'{args.save_folder:s}_pred.txt')
    gt_file = os.path.join(f'{args.save_folder:s}_gt.txt')
    fp1 = open(pred_file, 'w')
    fp2 = open(gt_file, 'w')

    _t = {'forward_pass': Timer(), 'misc': Timer()}

    # testing begin
    for i, img_name in enumerate(test_dataset.imgs_path):
        if i % 100 == 0:
            torch.cuda.empty_cache()

        # image_path = testset_folder + img_name
        img_raw = load_datacube(img_name)[..., used_channels]
        # img_raw = cv2.imread(img_name, cv2.IMREAD_COLOR)
        img = np.float32(img_raw)

        # testing scale
        target_size = img_dim
        max_size = 2150
        im_shape = img.shape
        im_size_min = np.min(im_shape[0:2])
        im_size_max = np.max(im_shape[0:2])
        resize = float(target_size) / float(im_size_min)
        # prevent bigger axis from being more than max_size:
        if np.round(resize * im_size_max) > max_size:
            resize = float(max_size) / float(im_size_max)
        if args.origin_size:
            resize = 1

        if resize != 1:

            img = np.stack([
                cv2.resize(img[..., i], None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) \
                for i in range(img.shape[-1])
            ], axis=-1)
        im_height, im_width, _ = img.shape
        scale = torch.Tensor(
            [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        img = (img - 127.5) / 128.0
        # img -= (104, 117, 123)
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.to(device)
        scale = scale.to(device)

        _t['forward_pass'].tic()
        loc, conf, landms = net(img)  # forward pass
        _t['forward_pass'].toc()
        _t['misc'].tic()
        priorbox = PriorBox(cfg, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.to(device)
        prior_data = priors.data
        boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
        boxes = boxes * scale / resize
        boxes = boxes.cpu().numpy()
        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
        landms = decode_landm(landms.data.squeeze(0), prior_data,
                              cfg['variance'])
        scale1 = torch.Tensor([
            img.shape[3], img.shape[2], img.shape[3], img.shape[2],
            img.shape[3], img.shape[2], img.shape[3], img.shape[2],
            img.shape[3], img.shape[2]
        ])
        scale1 = scale1.to(device)
        landms = landms * scale1 / resize
        landms = landms.cpu().numpy()

        # ignore low scores
        inds = np.where(scores > args.confidence_threshold)[0]
        boxes = boxes[inds]
        landms = landms[inds]
        scores = scores[inds]

        # keep top-K before NMS
        order = scores.argsort()[::-1]
        # order = scores.argsort()[::-1][:args.top_k]
        boxes = boxes[order]
        landms = landms[order]
        scores = scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                                copy=False)
        keep = py_cpu_nms(dets, args.nms_threshold)
        # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
        dets = dets[keep, :]
        landms = landms[keep]

        # keep top-K faster NMS
        dets = dets[:args.keep_top_k, :]
        landms = landms[:args.keep_top_k, :]

        prediction = np.concatenate((dets, landms), axis=1)
        _t['misc'].toc()

        # --------------------------------------------------------------------
        # save_name = os.path.join(args.save_folder, img_name.split('/')[-1].split('.')[0] + ".txt")
        # dirname = os.path.dirname(save_name)
        # if not os.path.isdir(dirname):
        #     os.makedirs(dirname)
        # with open(save_name, "w") as fd:
        #     bboxs = dets
        #     file_name = os.path.basename(save_name)[:-4] + "\n"
        #     bboxs_num = str(len(bboxs)) + "\n"
        #     fd.write(file_name)
        #     fd.write(bboxs_num)
        #     for box in bboxs:
        #         x = int(box[0])
        #         y = int(box[1])
        #         w = int(box[2]) - int(box[0])
        #         h = int(box[3]) - int(box[1])
        #         confidence = str(box[4])
        #         line = str(x) + " " + str(y) + " " + str(w) + " " + str(h) + " " + confidence + " \n"
        #         fd.write(line)

        fp1.write(f"# {img_name.lstrip(datadir).lstrip('/')}\n")
        if dets.shape[0] > 0:

            dets = prediction[0][:4].astype(np.int).tolist()
            dets[2] -= dets[0]
            dets[3] -= dets[1]
            landms = prediction[0][4:14]
            scores = prediction[0][14]

            label = [0. for _ in range(20)]
            label[-1] = scores
            label[:4] = dets
            label[4:-1] = landms
            label = ' '.join(list(map(str, label)))
            fp1.write(f'{label}\n')

        gt_label = ' '.join(list(map(str, test_dataset.words[i][0])))
        fp2.write(f"# {img_name.lstrip(datadir).lstrip('/')}\n")
        fp2.write(f'{gt_label}\n')
        print('im_detect: {:d}/{:d} forward_pass_time: {:.4f}s misc: {:.4f}s'.
              format(i + 1, num_images, _t['forward_pass'].average_time,
                     _t['misc'].average_time))

        # # save image
        # if args.save_image:
        #     for b in dets:
        #         if b[4] < args.vis_thres:
        #             continue
        #         text = "{:.4f}".format(b[4])
        #         b = list(map(int, b))
        #         cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
        #         cx = b[0]
        #         cy = b[1] + 12
        #         cv2.putText(img_raw, text, (cx, cy),
        #                     cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))

        #         # landms
        #         cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4)
        #         cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4)
        #         cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4)
        #         cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4)
        #         cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4)
        #     # save image
        #     if not os.path.exists("./results/"):
        #         os.makedirs("./results/")
        #     name = "./results/" + str(i) + ".jpg"
        #     cv2.imwrite(name, img_raw)

    fp1.close()
def train():
    if args.dataset == 'COCO':
        if args.dataset_root == VOC_ROOT:
            if not os.path.exists(COCO_ROOT):
                parser.error('Must specify dataset_root if specifying dataset')
            print("WARNING: Using default COCO dataset_root because " +
                  "--dataset_root was not specified.")
            args.dataset_root = COCO_ROOT
        cfg = coco
        dataset = COCODetection(root=args.dataset_root,
                                transform=SSDAugmentation(
                                    cfg['min_dim'], MEANS))
    elif args.dataset == 'VOC':
        if args.dataset_root == COCO_ROOT:
            parser.error('Must specify dataset if specifying dataset_root')
        cfg = voc
        dataset = VOCDetection(root=args.dataset_root,
                               transform=SSDAugmentation(
                                   cfg['min_dim'], MEANS))

    ssd_net = build_net('train', cfg['min_dim'], cfg['num_classes'])
    net = ssd_net

    if args.cuda:
        net = torch.nn.DataParallel(ssd_net)
        cudnn.benchmark = True

    if args.resume:
        print('Resuming training, loading {}...'.format(args.resume))
        ssd_net.load_weights(args.resume)
    # else:
    #     vgg_weights = torch.load(args.save_folder + args.basenet)
    #     print('Loading base network...')
    #     ssd_net.vgg.load_state_dict(vgg_weights)

    if args.cuda:
        net = net.cuda()

    if not args.resume:
        print('Initializing weights...')
        # initialize newly added layers' weights with xavier method
        ssd_net.loc.apply(weights_init)
        ssd_net.conf.apply(weights_init)

    optimizer = optim.SGD(net.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5,
                             False, args.cuda)

    priorbox = PriorBox(cfg)
    with torch.no_grad():
        priors = priorbox.forward()
        priors = priors.cuda()

    net.train()
    # loss counters
    loc_loss = 0
    conf_loss = 0
    epoch = 0
    print('Loading the dataset...')

    epoch_size = len(dataset) // args.batch_size
    print('Training SSD on:', dataset.name)
    print('Using the specified args:')
    print(args)

    step_index = 0

    data_loader = data.DataLoader(dataset,
                                  args.batch_size,
                                  num_workers=args.num_workers,
                                  shuffle=True,
                                  collate_fn=detection_collate,
                                  pin_memory=True)
    # create batch iterator
    batch_iterator = iter(data_loader)
    for iteration in range(args.start_iter, cfg['max_iter']):

        # reset epoch loss counters
        loc_loss = 0
        conf_loss = 0
        epoch += 1

        if iteration in cfg['lr_steps']:
            step_index += 1
            adjust_learning_rate(optimizer, args.gamma, step_index)

        # load train data
        try:
            images, targets = next(batch_iterator)
        except StopIteration:
            batch_iterator = iter(data_loader)
            images, targets = next(batch_iterator)

        if args.cuda:
            images = Variable(images.cuda())
            targets = [Variable(ann.cuda(), volatile=True) for ann in targets]
        else:
            images = Variable(images)
            targets = [Variable(ann, volatile=True) for ann in targets]
        # forward
        t0 = time.time()
        out = net(images)
        # backprop
        optimizer.zero_grad()
        loss_l, loss_c = criterion(out, priors, targets)
        loss = loss_l + loss_c
        loss.backward()
        optimizer.step()
        t1 = time.time()
        loc_loss += loss_l.item()
        conf_loss += loss_c.item()

        if iteration % 10 == 0:
            print('timer: %.4f sec.' % (t1 - t0))
            print('iter ' + repr(iteration) + ' || Loss: %.4f ||' %
                  (loss.item()),
                  end=' ')

        if iteration != 0 and iteration % 5000 == 0:
            print('Saving state, iter:', iteration)
            torch.save(
                ssd_net.state_dict(),
                'weights/Mobile-Net-ssd300_COCO_' + repr(iteration) + '.pth')
    torch.save(ssd_net.state_dict(),
               args.save_folder + '' + args.dataset + '.pth')