Exemplo n.º 1
0
def get_dataset(datasetnames):
    names = datasetnames.split('+')
    dataset = RoiDataset(get_imdb(names[0]))
    print('load dataset {}'.format(names[0]))
    for name in names[1:]:
        tmp = RoiDataset(get_imdb(name))
        dataset += tmp
        print('load and add dataset {}'.format(name))
    return dataset
Exemplo n.º 2
0
def get_dataset(datasetnames):
    names = datasetnames.split(
        '+')  # ['voc_2007_trainval', 'voc_2012_trainval']
    dataset = RoiDataset(get_imdb(names[0]))  # 'voc_2007_trainval'
    print('load dataset {}'.format(names[0]))
    for name in names[1:]:
        tmp = RoiDataset(get_imdb(name))
        dataset += tmp
        print('load and add dataset {}'.format(name))
    return dataset
def train():

    # define the hyper parameters first
    args = parse_args()
    args.steplr_epoch = cfg.steplr_epoch
    args.steplr_factor = cfg.steplr_factor
    args.weight_decay = cfg.weight_decay
    args.momentum = cfg.momentum

    print('Called with args:')
    print(args)

    lr = args.lr

    # initial tensorboardX writer
    if args.use_tfboard:
        if args.exp_name == 'default':
            writer = SummaryWriter()
        else:
            writer = SummaryWriter('runs/' + args.exp_name)

    args.imdb_name = 'trainval'
    args.imdbval_name = 'trainval'

    output_dir = args.output_dir
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # load dataset
    print('loading dataset....')
    train_dataset = RoiDataset(get_imdb(args.imdb_name))

    print('dataset loaded.')

    print('training rois number: {}'.format(len(train_dataset)))
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=args.num_workers,
                                  collate_fn=detection_collate,
                                  drop_last=True)

    # initialize the model
    print('initialize the model')
    tic = time.time()
    model = Yolov2(pretrained=True, arch=args.arch)
    toc = time.time()
    print('model loaded: cost time {:.2f}s'.format(toc - tic))

    # initialize the optimizer
    optimizer = optim.SGD([{
        "params": model.trunk.parameters(),
        "lr": args.lr * cfg.former_lr_decay
    }, {
        "params": model.conv3.parameters()
    }, {
        "params": model.conv4.parameters()
    }],
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    scheduler = StepLR(optimizer,
                       step_size=args.steplr_epoch,
                       gamma=args.steplr_factor)

    if args.resume:
        print('resume training enable')
        resume_checkpoint_name = 'yolov2_epoch_{}.pth'.format(
            args.checkpoint_epoch)
        resume_checkpoint_path = os.path.join(output_dir,
                                              resume_checkpoint_name)
        print('resume from {}'.format(resume_checkpoint_path))
        checkpoint = torch.load(resume_checkpoint_path)
        model.load_state_dict(checkpoint['model'])
        args.start_epoch = checkpoint['epoch'] + 1
        lr = checkpoint['lr']
        print('learning rate is {}'.format(lr))
        adjust_learning_rate(optimizer, lr)

    if args.use_cuda:
        model.cuda()

    if args.mGPUs:
        model = nn.DataParallel(model)

    # set the model mode to train because we have some layer whose behaviors are different when in training and testing.
    # such as Batch Normalization Layer.
    model.train()

    iters_per_epoch = int(len(train_dataset) / args.batch_size)

    # start training
    for epoch in range(args.start_epoch, args.max_epochs + 1):
        loss_temp = 0
        tic = time.time()
        train_data_iter = iter(train_dataloader)

        scheduler.step()
        lr = get_lr(optimizer)

        if cfg.multi_scale and epoch in cfg.epoch_scale:
            cfg.scale_range = cfg.epoch_scale[epoch]
            print('change scale range to {}'.format(cfg.scale_range))

        for step in range(iters_per_epoch):

            if cfg.multi_scale and (step + 1) % cfg.scale_step == 0:
                scale_index = np.random.randint(*cfg.scale_range)
                cfg.input_size = cfg.input_sizes[scale_index]
                ##print('change input size {}'.format(cfg.input_size))

            im_data, boxes, gt_classes, num_obj = next(train_data_iter)
            if args.use_cuda:
                im_data = im_data.cuda()
                boxes = boxes.cuda()
                gt_classes = gt_classes.cuda()
                num_obj = num_obj.cuda()

            im_data_variable = Variable(im_data)

            box_loss, iou_loss, class_loss = model(im_data_variable,
                                                   boxes,
                                                   gt_classes,
                                                   num_obj,
                                                   training=True)

            loss = box_loss.mean()+ iou_loss.mean() \
                   + class_loss.mean()

            optimizer.zero_grad()

            loss.backward()
            optimizer.step()

            loss_temp += loss.item()

            if (step + 1) % args.display_interval == 0:
                toc = time.time()
                loss_temp /= args.display_interval

                iou_loss_v = iou_loss.mean().item()
                box_loss_v = box_loss.mean().item()
                class_loss_v = class_loss.mean().item()

                log = "[epoch %2d][step %4d/%4d] loss: %.4f, lr: %.2e, iou_loss: %.4f, box_loss: %.4f, cls_loss: %.4f" \
                      % (epoch, step+1, iters_per_epoch, loss_temp, lr, iou_loss_v, box_loss_v, class_loss_v)
                print(log)

                logfile = os.path.join(output_dir, 'training_log.txt')
                with open(logfile, 'a') as f:
                    print(log, file=f)

                if args.use_tfboard:

                    n_iter = (epoch - 1) * iters_per_epoch + step + 1
                    writer.add_scalar('lr', lr, n_iter)
                    writer.add_scalar('losses/loss', loss_temp, n_iter)
                    writer.add_scalar('losses/iou_loss', iou_loss_v, n_iter)
                    writer.add_scalar('losses/box_loss', box_loss_v, n_iter)
                    writer.add_scalar('losses/cls_loss', class_loss_v, n_iter)

                loss_temp = 0
                tic = time.time()

        if epoch % args.save_interval == 0:
            save_name = os.path.join(output_dir,
                                     'yolov2_epoch_{}.pth'.format(epoch))
            torch.save(
                {
                    'model':
                    model.module.state_dict()
                    if args.mGPUs else model.state_dict(),
                    'epoch':
                    epoch,
                    'lr':
                    lr
                }, save_name)
Exemplo n.º 4
0
def train():
    args = parse_args()
    args.decay_lrs = cfg.TRAIN.DECAY_LRS

    cfg.USE_GPU_NMS = True if args.use_cuda else False

    assert args.batch_size == 1, 'Only support single batch'

    lr = cfg.TRAIN.LEARNING_RATE
    momentum = cfg.TRAIN.MOMENTUM
    weight_decay = cfg.TRAIN.WEIGHT_DECAY
    gamma = cfg.TRAIN.GAMMA

    # initial tensorboardX writer
    if args.use_tfboard:
        if args.exp_name == 'default':
            writer = SummaryWriter()
        else:
            writer = SummaryWriter('runs/' + args.exp_name)

    if args.dataset == 'voc07trainval':
        args.imdb_name = 'voc_2007_trainval'
        args.imdbval_name = 'voc_2007_test'

    elif args.dataset == 'voc0712trainval':
        args.imdb_name = 'voc_2007_trainval+voc_2012_trainval'
        args.imdbval_name = 'voc_2007_test'
    else:
        raise NotImplementedError

    if args.net == 'res50':
        fname = 'resnet50-caffe.pth'
    elif args.net == 'res101':
        fname = 'resnet101-caffe.pth'
    else:
        raise NotImplementedError

    args.pretrained_model = os.path.join('data', 'pretrained', fname)

    output_dir = args.output_dir
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # dataset_cachefile = os.path.join(output_dir, 'dataset.pickle')
    # if not os.path.exists(dataset_cachefile):
    #     imdb, roidb = combined_roidb(args.imdb_name)
    #     cache = [imdb, roidb]
    #     with open(dataset_cachefile, 'wb') as f:
    #         pickle.dump(cache, f)
    #     print('save dataset cache')
    # else:
    #     with open(dataset_cachefile, 'rb') as f:
    #         cache = pickle.load(f)
    #         imdb, roidb = cache[0], cache[1]
    #         print('loaded dataset from cache')

    imdb, roidb = combined_roidb(args.imdb_name)

    train_dataset = RoiDataset(roidb)
    train_dataloader = DataLoader(train_dataset, args.batch_size, shuffle=True)

    model = FasterRCNN(backbone=args.net, pretrained=args.pretrained_model)
    print('model loaded')

    # if cfg.PRETRAINED_RPN:
    #     rpn_model_path = 'output/rpn.pth'
    #     model.load_state_dict(torch.load(rpn_model_path)['model'])
    #     print('loaded rpn!')

    # optimizer
    params = []
    for key, value in dict(model.named_parameters()).items():
        if value.requires_grad:
            if 'bias' in key:
                params += [{'params': [value], 'lr': lr * (cfg.TRAIN.DOUBLE_BIAS + 1), \
                            'weight_decay': cfg.TRAIN.BIAS_DECAY and weight_decay or 0}]
            else:
                params += [{
                    'params': [value],
                    'lr': lr,
                    'weight_decay': weight_decay
                }]

    optimizer = SGD(params, momentum=momentum)

    if args.use_cuda:
        model = model.cuda()

    model.train()

    iters_per_epoch = int(len(train_dataset) / args.batch_size)

    # start training
    for epoch in range(args.start_epoch, args.max_epochs + 1):
        loss_temp = 0
        rpn_tp, rpn_tn, rpn_fg, rpn_bg = 0, 0, 0, 0
        rcnn_tp, rcnn_tn, rcnn_fg, rcnn_bg = 0, 0, 0, 0
        tic = time.time()
        train_data_iter = iter(train_dataloader)

        if epoch in args.decay_lrs:
            lr = lr * gamma
            adjust_learning_rate(optimizer, lr)
            print('adjust learning rate to {}'.format(lr))

        for step in range(iters_per_epoch):
            im_data, gt_boxes, im_info = next(train_data_iter)
            if args.use_cuda:
                im_data = im_data.cuda()
                gt_boxes = gt_boxes.cuda()
                im_info = im_info.cuda()

            im_data_variable = Variable(im_data)

            output = model(im_data_variable, gt_boxes, im_info)
            rois, _, _, \
            rcnn_cls_loss, rcnn_box_loss, \
            rpn_cls_loss, rpn_box_loss, _train_info = output

            loss = rcnn_cls_loss.mean() + rcnn_box_loss.mean() +\
                   rpn_cls_loss.mean() + rpn_box_loss.mean()

            optimizer.zero_grad()

            loss.backward()
            optimizer.step()

            loss_temp += loss.item()

            if cfg.VERBOSE:
                rpn_tp += _train_info['rpn_tp']
                rpn_tn += _train_info['rpn_tn']
                rpn_fg += _train_info['rpn_num_fg']
                rpn_bg += _train_info['rpn_num_bg']
                rcnn_tp += _train_info['rcnn_tp']
                rcnn_tn += _train_info['rcnn_tn']
                rcnn_fg += _train_info['rcnn_num_fg']
                rcnn_bg += _train_info['rcnn_num_bg']

            if (step + 1) % args.display_interval == 0:
                toc = time.time()
                loss_temp /= args.display_interval
                rpn_cls_loss_v = rpn_cls_loss.mean().item()
                rpn_box_loss_v = rpn_box_loss.mean().item()
                rcnn_cls_loss_v = rcnn_cls_loss.mean().item()
                rcnn_box_loss_v = rcnn_box_loss.mean().item()

                print("[epoch %2d][step %4d/%4d] loss: %.4f, lr: %.2e, time cost %.1fs" \
                      % (epoch, step+1, iters_per_epoch, loss_temp, lr, toc - tic))
                print("\t\t\t rpn_cls_loss_v: %.4f, rpn_box_loss_v: %.4f\n\t\t\t "
                      "rcnn_cls_loss_v: %.4f, rcnn_box_loss_v: %.4f" \
                      % (rpn_cls_loss_v, rpn_box_loss_v, rcnn_cls_loss_v, rcnn_box_loss_v))
                if cfg.VERBOSE:
                    print('\t\t\t RPN : [FG/BG] [%d/%d], FG: %.4f, BG: %.4f' %
                          (rpn_fg, rpn_bg, float(rpn_tp) / rpn_fg,
                           float(rpn_tn) / rpn_bg))
                    print('\t\t\t RCNN: [FG/BG] [%d/%d], FG: %.4f, BG: %.4f' %
                          (rcnn_fg, rcnn_bg, float(rcnn_tp) / rcnn_fg,
                           float(rcnn_tn) / rcnn_bg))

                if args.use_tfboard:
                    n_iter = (epoch - 1) * iters_per_epoch + step + 1
                    writer.add_scalar('losses/loss', loss_temp, n_iter)
                    writer.add_scalar('losses/rpn_cls_loss_v', rpn_cls_loss_v,
                                      n_iter)
                    writer.add_scalar('losses/rpn_box_loss_v', rpn_box_loss_v,
                                      n_iter)
                    writer.add_scalar('losses/rcnn_cls_loss_v',
                                      rcnn_cls_loss_v, n_iter)
                    writer.add_scalar('losses/rcnn_box_loss_v',
                                      rcnn_box_loss_v, n_iter)

                    if cfg.VERBOSE:
                        writer.add_scalar('rpn/fg_acc',
                                          float(rpn_tp) / rpn_fg, n_iter)
                        writer.add_scalar('rpn/bg_acc',
                                          float(rpn_tn) / rpn_bg, n_iter)
                        writer.add_scalar('rcnn/fg_acc',
                                          float(rcnn_tp) / rcnn_fg, n_iter)
                        writer.add_scalar('rcnn/bg_acc',
                                          float(rcnn_tn) / rcnn_bg, n_iter)

                loss_temp = 0
                rpn_tp, rpn_tn, rpn_fg, rpn_bg = 0, 0, 0, 0
                rcnn_tp, rcnn_tn, rcnn_fg, rcnn_bg = 0, 0, 0, 0
                tic = time.time()

        if epoch % args.save_interval == 0:
            save_name = os.path.join(
                output_dir, 'faster_{}_epoch_{}.pth'.format(args.net, epoch))
            torch.save({
                'model': model.state_dict(),
                'epoch': epoch,
                'lr': lr
            }, save_name)
Exemplo n.º 5
0
def test():
    args = parse_args()
    args.conf_thresh = 0.005
    args.nms_thresh = 0.45
    if args.vis:
        args.conf_thresh = 0.5
    print('Called with args:')
    print(args)

    # prepare dataset

    val_imdb = get_imdb(args.dataset)

    val_dataset = RoiDataset(val_imdb, train=False)
    val_dataloader = DataLoader(val_dataset,
                                batch_size=args.batch_size,
                                shuffle=False)

    # load model
    model = Yolov2(arch=args.arch)
    # weight_loader = WeightLoader()
    # weight_loader.load(model, 'yolo-voc.weights')
    # print('loaded')

    model_path = os.path.join(args.output_dir, args.model_name + '.pth')
    print('loading model from {}'.format(model_path))
    if torch.cuda.is_available():
        checkpoint = torch.load(model_path)
    else:
        checkpoint = torch.load(model_path, map_location='cpu')
    model.load_state_dict(checkpoint['model'])

    if args.use_cuda:
        model.cuda()

    model.eval()
    print('model loaded')

    dataset_size = len(val_imdb.image_index)
    print('classes: ', val_imdb.num_classes)

    all_boxes = [[[] for _ in range(dataset_size)]
                 for _ in range(val_imdb.num_classes)]

    det_file = os.path.join(args.output_dir, 'detections.pkl')

    results = []

    img_id = -1
    with torch.no_grad():
        for batch, (im_data, im_infos) in enumerate(val_dataloader):
            if args.use_cuda:
                im_data_variable = Variable(im_data).cuda()
            else:
                im_data_variable = Variable(im_data)

            yolo_outputs = model(im_data_variable)
            for i in range(im_data.size(0)):
                img_id += 1
                output = [item[i].data for item in yolo_outputs]
                im_info = {'width': im_infos[i][0], 'height': im_infos[i][1]}
                detections = yolo_eval(output,
                                       im_info,
                                       conf_threshold=args.conf_thresh,
                                       nms_threshold=args.nms_thresh)

                if img_id % 100 == 0:
                    print('im detect [{}/{}]'.format(img_id + 1,
                                                     len(val_dataset)))
                if len(detections) > 0:
                    for cls in range(val_imdb.num_classes):
                        inds = torch.nonzero(detections[:, -1] == cls).view(-1)
                        if inds.numel() > 0:
                            cls_det = torch.zeros((inds.numel(), 5))
                            cls_det[:, :4] = detections[inds, :4]
                            cls_det[:, 4] = detections[inds,
                                                       4] * detections[inds, 5]
                            all_boxes[cls][img_id] = cls_det.cpu().numpy()

                img = Image.open(val_imdb.image_path_at(img_id))
                if len(detections) > 0:
                    detect_result = {}

                    boxes = detections[:, :5].cpu().numpy()
                    classes = detections[:, -1].long().cpu().numpy()
                    class_names = val_imdb.classes

                    num_boxes = boxes.shape[0]

                    labels = []

                    for i in range(num_boxes):
                        det_bbox = tuple(
                            np.round(boxes[i, :4]).astype(np.int64))
                        score = boxes[i, 4]
                        gt_class_ind = classes[i]
                        class_name = class_names[gt_class_ind]
                        disp_str = '{}: {:.2f}'.format(class_name, score)

                        bbox = tuple(np.round(boxes[i, :4]).astype(np.int64))

                        xmin = bbox[0]
                        ymin = bbox[1]
                        xmax = bbox[2]
                        ymax = bbox[3]

                        box2d = {}
                        box2d["x1"] = str(xmin)
                        box2d["y1"] = str(ymin)
                        box2d["x2"] = str(xmax)
                        box2d["y2"] = str(ymax)

                        bbox = {}
                        bbox["box2d"] = box2d
                        bbox["category"] = class_name

                        labels.append(bbox)

                    detect_result["ImageID"] = os.path.basename(
                        val_imdb.image_path_at(img_id))
                    detect_result["labels"] = labels

                    results.append(detect_result)

                if args.vis:
                    img = Image.open(val_imdb.image_path_at(img_id))
                    if len(detections) == 0:
                        continue
                    det_boxes = detections[:, :5].cpu().numpy()
                    det_classes = detections[:, -1].long().cpu().numpy()
                    im2show = draw_detection_boxes(
                        img,
                        det_boxes,
                        det_classes,
                        class_names=val_imdb.classes)
                    plt.figure()
                    plt.imshow(im2show)
                    plt.show()

            #if img_id > 10:
            #    break

    print(results)
    results_file = os.path.join(args.output_dir, 'detections.json')
    with open(results_file, 'w') as f:
        json.dump(results,
                  f,
                  ensure_ascii=False,
                  indent=4,
                  sort_keys=True,
                  separators=(',', ': '))

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    val_imdb.evaluate_detections(all_boxes, output_dir=args.output_dir)
Exemplo n.º 6
0
def train():

    args = parse_args()
    lr = args.lr
    decay_lrs = args.decay_lrs
    momentum = args.momentum
    weight_decay = args.weight_decay
    bais_decay = args.bais_decay
    gamma = args.gamma

    cfg.USE_GPU_NMS = True if args.use_gpu else False

    if args.use_tfboard:
        writer = SummaryWriter()

    # load data
    print('load data')
    if args.dataset == 'voc07trainval':
        dataset_name = 'voc_2007_trainval'
    elif args.dataset == 'voc12trainval':
        dataset_name = 'voc_2012_trainval'
    elif args.dataset == 'voc0712trainval':
        dataset_name = 'voc_2007_trainval+voc_2012_trainval'
    else:
        raise NotImplementedError

    imdb, roidb = combined_roidb(dataset_name)
    train_dataset = RoiDataset(roidb)
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=args.batch_size,
                                  shuffle=True)
    iter_per_epoch = int(len(train_dataset) / args.batch_size)

    # prepare model
    print('load model')
    model = FasterRCNN(backbone=args.backbone)
    params = []
    for key, value in dict(model.named_parameters()).items():
        if value.requires_grad:
            if 'bias' in key:
                params += [{
                    'params': [value],
                    'lr': lr * (cfg.TRAIN.DOUBLE_BIAS + 1),
                    'weight_decay': bais_decay and weight_decay or 0
                }]
            else:
                params += [{
                    'params': [value],
                    'lr': lr,
                    'weight_decay': weight_decay
                }]

    if args.use_gpu:
        model = model.cuda()

    model.train()

    # define optimizer
    optimizer = SGD(params, momentum=momentum)

    # training
    print('start training...')
    for epoch in range(args.epochs):
        start_time = time.time()
        train_data_iter = iter(train_dataloader)
        temp_loss = 0
        rpn_tp, rpn_tn, rpn_fg, rpn_bg = 0, 0, 0, 0
        faster_rcnn_tp, faster_rcnn_tn, faster_rcnn_fg, faster_rcnn_bg = 0, 0, 0, 0

        if epoch in decay_lrs:
            lr = lr * gamma
            adjust_lr(optimizer, lr)
            print('adjusting learning rate to {}'.format(lr))

        for step in range(iter_per_epoch):
            im_data, gt_boxes, im_info = next(train_data_iter)

            if args.use_gpu:
                im_data = im_data.cuda()
                gt_boxes = gt_boxes.cuda()
                im_info = im_info.cuda()

            im_data_variable = Variable(im_data)

            outputs = model(im_data_variable, gt_boxes, im_info)
            rois, _, _, faster_rcnn_cls_loss, faster_rcnn_reg_loss, \
            rpn_cls_loss, rpn_reg_loss, _train_info = outputs

            loss = faster_rcnn_cls_loss.mean() + faster_rcnn_reg_loss.mean() + \
                   rpn_cls_loss.mean() + rpn_reg_loss.mean()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            temp_loss += loss.item()

            if cfg.VERBOSE:
                rpn_tp += _train_info['rpn_tp']
                rpn_tn += _train_info['rpn_tn']
                rpn_fg += _train_info['rpn_num_fg']
                rpn_bg += _train_info['rpn_num_bg']
                faster_rcnn_tp += _train_info['faster_rcnn_tp']
                faster_rcnn_tn += _train_info['faster_rcnn_tn']
                faster_rcnn_fg += _train_info['faster_rcnn_num_fg']
                faster_rcnn_bg += _train_info['faster_rcnn_num_bg']

            if (step + 1) % args.display_interval == 0:
                end_time = time.time()
                temp_loss /= args.display_interval
                rpn_cls_loss_m = rpn_cls_loss.mean().item()
                rpn_reg_loss_m = rpn_reg_loss.mean().item()
                faster_rcnn_cls_loss_m = faster_rcnn_cls_loss.mean().item()
                faster_rcnn_reg_loss_m = faster_rcnn_reg_loss.mean().item()

                print('[epoch %2d][step %4d/%4d] loss: %.4f, time_cost: %.1f' %
                      (epoch, step + 1, iter_per_epoch, temp_loss,
                       end_time - start_time))
                print(
                    'loss: rpn_cls_loss_m: %.4f, rpn_reg_loss_m: %.4f, faster_rcnn_cls_loss_m: %.4f, faster_rcnn_reg_loss_m: %.4f'
                    % (rpn_cls_loss_m, rpn_reg_loss_m, faster_rcnn_cls_loss_m,
                       faster_rcnn_reg_loss_m))

                if args.use_tfboard:
                    n_iter = epoch * iter_per_epoch + step + 1
                    writer.add_scalar('losses/loss', temp_loss, n_iter)
                    writer.add_scalar('losses/rpn_cls_loss_m', rpn_cls_loss_m,
                                      n_iter)
                    writer.add_scalar('losses/rpn_reg_loss_m', rpn_reg_loss_m,
                                      n_iter)
                    writer.add_scalar('losses/faster_rcnn_cls_loss_m',
                                      faster_rcnn_cls_loss_m, n_iter)
                    writer.add_scalar('losses/faster_rcnn_reg_loss_m',
                                      faster_rcnn_reg_loss_m, n_iter)

                    if cfg.VERBOSE:
                        writer.add_scalar('rpn/fg_acc',
                                          float(rpn_tp) / rpn_fg, n_iter)
                        writer.add_scalar('rpn/bg_acc',
                                          float(rpn_tn) / rpn_bg, n_iter)
                        writer.add_scalar(
                            'rcnn/fg_acc',
                            float(faster_rcnn_tp) / faster_rcnn_fg, n_iter)
                        writer.add_scalar(
                            'rcnn/bg_acc',
                            float(faster_rcnn_tn) / faster_rcnn_bg, n_iter)

                temp_loss = 0
                rpn_tp, rpn_tn, rpn_fg, rpn_bg = 0, 0, 0, 0
                faster_rcnn_tp, faster_rcnn_tn, faster_rcnn_fg, faster_rcnn_bg = 0, 0, 0, 0
                start_time = time.time()

        if not os.path.exists(args.output_dir):
            os.mkdir(args.output_dir)

        if epoch % args.save_interval == 0:
            save_name = os.path.join(
                args.output_dir, 'faster_rcnn101_epoch_{}.pth'.format(epoch))
            torch.save({
                'model': model.state_dict(),
                'epoch': epoch,
                'lr': lr
            }, save_name)
Exemplo n.º 7
0
def test():
    args = parse_args()
    args.conf_thresh = 0.005
    args.nms_thresh = 0.45
    if args.vis:
        args.conf_thresh = 0.5
    print('Called with args:')
    print(args)

    # prepare dataset

    if args.dataset == 'voc07trainval':
        args.imdbval_name = 'voc_2007_trainval'

    elif args.dataset == 'voc07test':
        args.imdbval_name = 'voc_2007_test'

    else:
        raise NotImplementedError

    val_imdb = get_imdb(args.imdbval_name)

    val_dataset = RoiDataset(val_imdb, train=False)
    val_dataloader = DataLoader(val_dataset,
                                batch_size=args.batch_size,
                                shuffle=False)

    # load model
    model = Yolov2()
    # weight_loader = WeightLoader()
    # weight_loader.load(model, 'yolo-voc.weights')
    # print('loaded')

    model_path = os.path.join(args.output_dir, args.model_name + '.pth')
    print('loading model from {}'.format(model_path))
    if torch.cuda.is_available():
        checkpoint = torch.load(model_path)
    else:
        checkpoint = torch.load(model_path, map_location='cpu')
    model.load_state_dict(checkpoint['model'])

    if args.use_cuda:
        model.cuda()

    model.eval()
    print('model loaded')

    dataset_size = len(val_imdb.image_index)

    all_boxes = [[[] for _ in range(dataset_size)]
                 for _ in range(val_imdb.num_classes)]

    det_file = os.path.join(args.output_dir, 'detections.pkl')

    img_id = -1
    with torch.no_grad():
        for batch, (im_data, im_infos) in enumerate(val_dataloader):
            if args.use_cuda:
                im_data_variable = Variable(im_data).cuda()
            else:
                im_data_variable = Variable(im_data)

            yolo_outputs = model(im_data_variable)
            for i in range(im_data.size(0)):
                img_id += 1
                output = [item[i].data for item in yolo_outputs]
                im_info = {'width': im_infos[i][0], 'height': im_infos[i][1]}
                detections = yolo_eval(output,
                                       im_info,
                                       conf_threshold=args.conf_thresh,
                                       nms_threshold=args.nms_thresh)
                print('im detect [{}/{}]'.format(img_id + 1, len(val_dataset)))
                if len(detections) > 0:
                    for cls in range(val_imdb.num_classes):
                        inds = torch.nonzero(detections[:, -1] == cls).view(-1)
                        if inds.numel() > 0:
                            cls_det = torch.zeros((inds.numel(), 5))
                            cls_det[:, :4] = detections[inds, :4]
                            cls_det[:, 4] = detections[inds,
                                                       4] * detections[inds, 5]
                            all_boxes[cls][img_id] = cls_det.cpu().numpy()

                if args.vis:
                    img = Image.open(val_imdb.image_path_at(img_id))
                    if len(detections) == 0:
                        continue
                    det_boxes = detections[:, :5].cpu().numpy()
                    det_classes = detections[:, -1].long().cpu().numpy()
                    im2show = draw_detection_boxes(
                        img,
                        det_boxes,
                        det_classes,
                        class_names=val_imdb.classes)
                    plt.figure()
                    plt.imshow(im2show)
                    plt.show()

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    val_imdb.evaluate_detections(all_boxes, output_dir=args.output_dir)
Exemplo n.º 8
0
def get_dataset(datasetnames):
    names = datasetnames
    dataset = RoiDataset(get_imdb(names))
    return dataset
Exemplo n.º 9
0
def test():
    args = parse_args()

    # perpare data
    print('load data')
    if args.dataset == 'voc07test':
        dataset_name = 'voc_2007_test'
    elif args.dataset == 'voc12test':
        dataset_name = 'voc_2012_test'
    else:
        raise NotImplementedError

    cfg.TRAIN.USE_FLIPPED = False

    imdb, roidb = combined_roidb(dataset_name)
    test_dataset = RoiDataset(roidb)
    test_dataloader = DataLoader(dataset=test_dataset,
                                 batch_size=1,
                                 shuffle=False)
    test_data_iter = iter(test_dataloader)

    # load model
    model = FasterRCNN(backbone=args.backbone)
    model_name = '0712_faster_rcnn101_epoch_{}.pth'.format(args.check_epoch)
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    model_path = os.path.join(args.output_dir, model_name)
    model.load_state_dict(torch.load(model_path)['model'])

    if args.use_gpu:
        model = model.cuda()

    model.eval()

    num_images = len(imdb.image_index)
    det_file_path = os.path.join(args.output_dir, 'detections.pkl')

    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(imdb.num_classes)]

    empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0))

    torch.set_grad_enabled(False)

    for i in range(num_images):
        start_time = time.time()
        im_data, gt_boxes, im_info = next(test_data_iter)
        if args.use_gpu:
            im_data = im_data.cuda()
            gt_boxes = gt_boxes.cuda()
            im_info = im_info.cuda()

        im_data_variable = Variable(im_data)

        det_tic = time.time()
        rois, faster_rcnn_cls_prob, faster_rcnn_reg, _, _, _, _, _ = model(
            im_data_variable, gt_boxes, im_info)

        scores = faster_rcnn_cls_prob.data
        boxes = rois.data[:, 1:]
        boxes_deltas = faster_rcnn_reg.data

        if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            boxes_deltas = boxes_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                         + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
            boxes_deltas = boxes_deltas.view(-1, 4 * imdb.num_classes)

        pred_boxes = bbox_transform_inv_cls(boxes, boxes_deltas)

        pred_boxes = clip_boxes_cls(pred_boxes, im_info[0])

        pred_boxes /= im_info[0][2].item()

        det_toc = time.time()
        detect_time = det_tic - det_toc
        nms_tic = time.time()

        if args.vis:
            im_show = Image.open(imdb.image_path_at(i))

        for j in range(1, imdb.num_classes):
            inds = torch.nonzero(scores[:, j] > args.thresh).view(-1)

            if inds.numel() > 0:
                cls_score = scores[:, j][inds]
                _, order = torch.sort(cls_score, 0, True)
                cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4]
                cls_dets = torch.cat((cls_boxes, cls_score.unsqueeze(1)), 1)
                cls_dets = cls_dets[order]
                keep = nms(cls_dets, 0.3)
                cls_dets = cls_dets[keep.view(-1).long()]
                if args.vis:
                    cls_name_dets = np.repeat(j, cls_dets.size(0))
                    im_show = draw_detection_boxes(im_show,
                                                   cls_dets.cpu().numpy(),
                                                   cls_name_dets, imdb.classes,
                                                   0.5)
                all_boxes[j][i] = cls_dets.cpu().numpy()
            else:
                all_boxes[j][i] = empty_array

        if args.max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)])
            if len(image_scores) > args.max_per_image:
                image_thresh = np.sort(image_scores)[-args.max_per_image]
                for j in range(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        if args.vis:
            plt.imshow(im_show)
            plt.show()
        nms_toc = time.time()
        nms_time = nms_tic - nms_toc
        sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
                         .format(i + 1, num_images, detect_time, nms_time))
        sys.stdout.flush()

    with open(det_file_path, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    imdb.evaluate_detections(all_boxes, args.output_dir)

    end_time = time.time()
    print("test time: %0.4fs" % (end_time - start_time))
Exemplo n.º 10
0
def test():
    args = parse_args()

    if args.vis:
        args.conf_thresh = 0.5

    # load test data
    if args.dataset == 'voc07test':
        dataset_name = 'voc_2007_test'
    elif args.dataset == 'voc12test':
        dataset_name = 'voc_2012_test'
    else:
        raise NotImplementedError

    test_imdb = get_imdb(dataset_name)
    test_dataset = RoiDataset(test_imdb, train=False)
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=args.batch_size,
                                 num_workers=args.num_workers,
                                 shuffle=False)

    # load model
    model = YOLOv2()

    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)
    weight_file_path = os.path.join(
        args.output_dir, 'yolov2_epoch_{}.pth'.format(args.check_epoch))

    if torch.cuda.is_available:
        state_dict = torch.load(weight_file_path)
    else:
        state_dict = torch.load(weight_file_path, map_location='cpu')

    model.load_state_dict(state_dict['model'])

    if args.use_cuda:
        model = model.cuda()

    model.eval()

    num_data = len(test_dataset)

    all_boxes = [[[] for _ in range(num_data)]
                 for _ in range(test_imdb.num_classes)]

    img_id = -1

    det_file = os.path.join(args.output_dir, 'detections.pkl')

    with torch.no_grad():
        for batch_size, (im_data, im_infos) in enumerate(test_dataloader):

            if args.use_cuda:
                im_data = im_data.cuda()
                im_infos = im_infos.cuda()

            im_data_variable = Variable(im_data)

            outputs = model(im_data_variable)

            for i in range(im_data.size(0)):
                img_id += 1

                output = [item[i].data for item in outputs]
                im_info = im_infos[i]

                detections = eval(output, im_info, args.conf_thresh,
                                  args.nms_thresh)

                if len(detections) > 0:
                    for i in range(cfg.CLASS_NUM):
                        idxs = torch.nonzero(detections[:, -1] == i).view(-1)
                        if idxs.numel() > 0:
                            cls_det = torch.zeros((idxs.numel(), 5))
                            cls_det[:, :4] = detections[idxs, :4]
                            cls_det[:, 4] = detections[idxs,
                                                       4] * detections[idxs, 5]
                            all_boxes[i][img_id] = cls_det.cpu().numpy()

                if args.vis:
                    img = Image.open(test_imdb.image_path_at(img_id))
                    if len(detections) == 0:
                        continue
                    det_boxes = detections[:, :5].cpu().numpy()
                    det_classes = detections[:, -1].long().cpu().numpy()

                    imshow = draw_detection_boxes(
                        img,
                        det_boxes,
                        det_classes,
                        class_names=test_imdb.classes)

                    plt.figure()
                    plt.imshow(imshow)
                    plt.show()

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    test_imdb.evaluate_detections(all_boxes, output_dir=args.output_dir)