Пример #1
0
def train(model, train_loader, criterion, epoch, vis):
    model.train()
    batch_loss = 0
    for batch_idx, sample_batched in enumerate(train_loader):
        data = sample_batched['image']
        target = sample_batched['mask']
        data, target = Variable(data.type(opt.dtype)), Variable(
            target.type(opt.dtype))
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        batch_loss += loss.data[0]
        if (batch_idx + 1) % opt.plot_every == 0:
            ori_img_ = inverse_normalize(at.tonumpy(data[0]))
            target_ = at.tonumpy(target[0])
            pred_ = at.tonumpy(output[0])
            vis.img('gt_img', ori_img_)
            vis.img('gt_mask', target_)
            vis.img('pred_mask', (pred_ >= 0.5).astype(np.float32))

    batch_loss /= (batch_idx + 1)
    print('epoch: ' + str(epoch) + ', train loss: ' + str(batch_loss))
    with open('logs.txt', 'a') as file:
        file.write('epoch: ' + str(epoch) + ', train loss: ' +
                   str(batch_loss) + '\n')
    vis.plot('train loss', batch_loss)
def eval(dataloader, faster_rcnn, vis, test_num=10000):
    pred_bboxes, pred_labels, pred_scores = list(), list(), list()
    gt_bboxes, gt_labels, gt_difficults = list(), list(), list()
    for ii, (imgs, sizes, gt_bboxes_, gt_labels_,
             gt_difficults_) in tqdm(enumerate(dataloader)):
        # plot groud truth bboxes
        sizes = [sizes[0][0].item(), sizes[1][0].item()]
        pred_bboxes_, pred_labels_, pred_scores_ = faster_rcnn.predict(
            imgs, [sizes])
        img = imgs.cuda().float()
        ori_img_ = inverse_normalize(at.tonumpy(img[0]))
        pred_img = visdom_bbox(ori_img_, at.tonumpy(pred_bboxes_[0]),
                               at.tonumpy(pred_labels_[0]).reshape(-1),
                               at.tonumpy(pred_scores_[0]))
        vis.img('test_pred_img', pred_img)
        gt_bboxes += list(gt_bboxes_.numpy())
        gt_labels += list(gt_labels_.numpy())
        gt_difficults += list(gt_difficults_.numpy())
        pred_bboxes += pred_bboxes_
        pred_labels += pred_labels_
        pred_scores += pred_scores_
        if ii == test_num:
            break

    result = eval_detection_voc(pred_bboxes,
                                pred_labels,
                                pred_scores,
                                gt_bboxes,
                                gt_labels,
                                gt_difficults,
                                use_07_metric=True)
    return result
Пример #3
0
def train(**kwargs):
    opt._parse(kwargs)

    dataset = Dataset(opt)
    # img, bbox, label, scale = dataset[0]
    # 返回的img是被scale后的图像,可能已经被随机翻转了
    # 返回的 bbox 按照 ymin xmin ymax xmax 排列
    #  H, W = size(im)
    # 对于一张屏幕上显示的图片,a,b,c,d 代表 4 个顶点
    #        a   ...   b     ymin
    #        .         .
    #        c   ...   d     ymax  H高度    y的范围在 [0, H-1] 间
    #        xmin    xmax
    #        W宽度   x的范围在 [0, W-1] 间

    print('load data')
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, \
                                  # pin_memory=True,

                                  num_workers=opt.num_workers)

    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')

    trainer = FasterRCNNTrainer(faster_rcnn)

    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)

    for epoch in range(opt.epoch):
        for ii, (img, bbox_, label_, scale) in (enumerate(dataloader)):
            print('step: ', ii)

            scale = at.scalar(scale)
            img, bbox, label = img.float(), bbox_, label_
            img, bbox, label = Variable(img), Variable(bbox), Variable(label)
            trainer.train_step(img, bbox, label, scale)

            if ((ii + 1) % opt.plot_every == 0) and (epoch > 50):
                # 运行多少步以后再predict一次,epoch跑的太少的话根本预测不准什么东西
                #                if os.path.exists(opt.debug_file):
                #                    ipdb.set_trace()

                # plot groud truth bboxes  画出原本的框
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]),
                                     at.tonumpy(label_[0]))
                # gt_img  np类型  范围是 [0, 1] 间   3 x H x W
                # 这里要将 gt_img 这个带框,带标注的图像保存或者显示出来

                # plot predicti bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict(
                    [ori_img_], visualize=True)
                pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))
Пример #4
0
def imgflip(img, bbox, x_flip=True, y_flip=True):
    imgs = at.tonumpy(img[0])
    if y_flip:
        imgs = imgs[:, ::-1, :]
    if x_flip:
        imgs = imgs[:, :, ::-1]
    # print imgs
    imgs = np.expand_dims(imgs, axis=0)
    return inverse_normalize(imgs)
Пример #5
0
def train(**kwargs):
    opt._parse(kwargs)

    dataset = Dataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  # pin_memory=True,

                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)
    best_map = 0
    lr_ = opt.lr
    for epoch in range(opt.epoch):
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            scale = at.scalar(scale)

            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            img, bbox, label = Variable(img), Variable(bbox), Variable(label)
            trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot groud truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                # ori_img_ = (at.tonumpy(img[0]))
                losses = trainer.get_meter_data()
                print(losses)
                write_image(ori_img_, at.tonumpy(bbox[0]), 'gt.png')
                _bboxes = trainer.faster_rcnn.predict([ori_img_],
                                                      visualize=True)
                _bboxes = at.tonumpy(_bboxes[0])
                # plot predicted bboxes
                write_image(ori_img_, _bboxes, 'pred.png')
                print('saved an image')

        if epoch == 13:
            break
Пример #6
0
def train(individual, **kwargs):
    opt._parse(kwargs)

    dataset = Dataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset,
                                  batch_size=1,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False,
                                       pin_memory=True)
    faster_rcnn = FasterRCNN_mine(individual)
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    best_map = 0
    lr_ = opt.lr
    for epoch in range(opt.epoch):
        trainer.reset_meters()

        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            scale = at.scalar(scale)

            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                ori_img_ = inverse_normalize(at.tonumpy(img[0]))

                _bboxes, _labels, _scores = trainer.faster_rcnn.predict(
                    [ori_img_], visualize=True)

        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        best_path = None
        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(best_map=best_map)
        if epoch == 9:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay

        individual.accuracy = best_map
Пример #7
0
def eval_mAP(trainer, val_loader):
    tqdm.monitor_interval = 0
    mAP = []
    for ii, sample in tqdm(enumerate(val_loader)):
        if len(sample.keys()) == 5:
            img_id, img, bbox, scale, label = sample['img_id'], sample['image'], sample['bbox'], sample['scale'], \
                                                sample['label']
            img, bbox, label = img.cuda().float(), bbox.cuda(), label.cuda()
            img, bbox, label = Variable(img), Variable(bbox), Variable(label)

        else:
            img_id, img, scale = sample['img_id'], sample['image'], sample[
                'scale']
            bbox = np.zeros((1, 0, 4))
            label = np.zeros((1, 0, 1))
            img = img.cuda().float()
            img = Variable(img)
        # if bbox is None:
        #     continue
        scale = at.scalar(scale)
        ori_img_ = inverse_normalize(at.tonumpy(img[0]))
        pred_boxes, pred_labels, pred_scores = trainer.faster_rcnn.predict(
            [ori_img_], visualize=True)
        pred_boxes = pred_boxes[0]
        pred_labels = pred_labels[0]
        pred_scores = pred_scores[0]
        bbox = at.tonumpy(bbox[0])
        # Rescale back
        C, H, W = ori_img_.shape
        ori_img_ = transform.resize(ori_img_,
                                    (C, H * (1 / scale), W * (1 / scale)),
                                    mode='reflect')
        o_H, o_W = H * (1 / scale), W * (1 / scale)
        pred_boxes = resize_bbox(pred_boxes, (H, W), (o_H, o_W))
        bbox = resize_bbox(bbox, (H, W), (o_H, o_W))
        mAP.append(map_iou(bbox, pred_boxes, pred_scores))
        # if ii>=100:
        #     break

    mAP = np.array(mAP)
    mAP = mAP[mAP != np.array(None)].astype(np.float32)

    return np.mean(mAP)
Пример #8
0
def run_test(model, test_loader):
    pred_masks = []
    img_ids = []
    images = []
    for batch_idx, sample_batched in tqdm(enumerate(test_loader)):
        data, img_id = sample_batched['image'], sample_batched['img_id']
        data = Variable(data.type(opt.dtype), volatile=True)
        output = model.forward(data)
        # output = (output > 0.5)
        output = at.tonumpy(output)
        for i in range(0, output.shape[0]):
            pred_mask = np.squeeze(output[i])
            id = img_id[i]
            pred_mask = (pred_mask >= 0.5).astype(np.float32)
            pred_masks.append(pred_mask)
            img_ids.append(id)
            ori_img_ = inverse_normalize(at.tonumpy(data[i]))
            images.append(ori_img_)

    return img_ids, images, pred_masks
Пример #9
0
def input_visual(imgs, boxes, labels):
    imgs = inverse_normalize(at.tonumpy(imgs.squeeze())) / 255
    plt.figure(figsize=(8, 8))
    plt.imshow(imgs.transpose(1, 2, 0))
    input_boxes = boxes.reshape(-1, 4)
    w = input_boxes[:, 3] - input_boxes[:, 1]
    h = input_boxes[:, 2] - input_boxes[:, 0]
    for i in range(input_boxes.shape[0]):
        plt.gca().add_patch(
            Rectangle(input_boxes[i][[1, 0]],
                      w[i],
                      h[i],
                      fill=False,
                      edgecolor='r'))
        plt.text(
            input_boxes[i][1], input_boxes[i][0],
            dv.VOC_BBOX_LABEL_NAMES[labels.reshape(-1,
                                                   len(input_boxes))[0][i]])
    plt.axis("off")
    plt.show()
Пример #10
0
def train(**kwargs):
    opt._parse(kwargs) #获得config设置信息

    dataset = Dataset(opt) #传入opt,利用设置的数据集参数来创建训练数据集
    print('load data')
    dataloader = data_.DataLoader(dataset, \ #用创建的训练数据集创建训练DataLoader,代码仅支持batch_size=1
                                  batch_size=1, \
                                  shuffle=True, \
                                  # pin_memory=True,
                                  num_workers=opt.num_workers)
    testset = TestDataset(opt) #传入opt,利用设置的数据集参数来加载测试数据集
    test_dataloader = data_.DataLoader(testset, #用创建的测试数据集创建训练DataLoader,代码仅支持batch_size=1
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
                                       
    faster_rcnn = FasterRCNNVGG16() #创建以vgg为backbone的FasterRCNN网络
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda() #把创建好的FasterRCNN网络放入训练器
    if opt.load_path: #若有FasterRCNN网络的预训练加载,则加载load_path权重
        trainer.load(opt.load_path) #训练器加载权重
        print('load pretrained model from %s' % opt.load_path)
    trainer.vis.text(dataset.db.label_names, win='labels') 
    best_map = 0 #初始化best_map,训练时用于判断是否需要保存模型,类似打擂台后面用
    lr_ = opt.lr #得到预设的学习率
    for epoch in range(opt.epoch): #开始训练,训练次数为opt.epoch
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): 
            scale = at.scalar(scale) #进行类别处理得到scale(待定)
            #bbox是gt_box坐标(ymin, xmin, ymax, xmax)
            #label是类别的下标VOC_BBOX_LABEL_NAMES
            #img是图片,代码仅支持batch_size=1的训练
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() #使用gpu训练
            trainer.train_step(img, bbox, label, scale) #预处理完毕,进入模型

            if (ii + 1) % opt.plot_every == 0: #可视化内容,(跳过)
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())

                # plot groud truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_,
                                     at.tonumpy(bbox_[0]),
                                     at.tonumpy(label_[0]))
                trainer.vis.img('gt_img', gt_img)

                # plot predicti bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True)
                pred_img = visdom_bbox(ori_img_,
                                       at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))
                trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm')
                # roi confusion matrix
                trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float())
        
        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) #训练一个epoch评估一次
        trainer.vis.plot('test_map', eval_result['map']) #可视化内容,(跳过)
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] #获得当前的学习率
        log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), #日志输出学习率,map,loss
                                                  str(eval_result['map']),
                                                  str(trainer.get_meter_data()))
        trainer.vis.log(log_info) #可视化内容,(跳过)

        if eval_result['map'] > best_map: #若这次评估的map大于之前最大的map则保存模型
            best_map = eval_result['map'] #保存模型的map信息
            best_path = trainer.save(best_map=best_map) #调用保存模型函数
        if epoch == 9: #若训练到第9个epoch则加载之前最好的模型并且减低学习率继续训练
            trainer.load(best_path) #加载模型
            trainer.faster_rcnn.scale_lr(opt.lr_decay) #降低学习率
            lr_ = lr_ * opt.lr_decay #获得当前学习率

        if epoch == 13: #13个epoch停止训练
            break
            im_path_clone = str(img_id)
            # save_path = _data.save_dir + 'f0rcnn' + im_path.split('/')[-1]
            save_path = _data.save_dir + im_path_clone.split('/')[-1] + '.jpg'
            save_path_adv = _data.save_dir_adv + im_path_clone.split(
                '/')[-1] + '.jpg'
            save_path_perturb = _data.save_dir_perturb + 'frcnn_perturb_' + im_path_clone.split(
                '/')[-1] + '.jpg'

            if not os.path.exists(_data.save_dir):
                os.makedirs(_data.save_dir)
            if not os.path.exists(_data.save_dir_adv):
                os.makedirs(_data.save_dir_adv)
            if not os.path.exists(_data.save_dir_perturb):
                os.makedirs(_data.save_dir_perturb)

            ori_img_ = inverse_normalize(at.tonumpy(img[0]))
            # _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_],\
            #         new_score=quality, visualize=True)
            before = time.time()
            adv_img, perturb, distance = trainer.attacker.perturb_mD_img(
                img,
                save_perturb=save_path_perturb,
                rois=rois,
                roi_scores=roi_scores)
            after = time.time()
            generate_time = after - before
            total_time = total_time + generate_time
            total_distance = total_distance + distance
            adv_img_ = inverse_normalize(at.tonumpy(adv_img[0]))
            perturb_ = inverse_normalize(at.tonumpy(perturb[0]))
            del adv_img, perturb, img
Пример #12
0
def train_val():
    print('load data')
    train_loader, val_loader = get_train_val_loader(
        opt.root_dir,
        batch_size=opt.batch_size,
        val_ratio=0.1,
        shuffle=opt.shuffle,
        num_workers=opt.num_workers,
        pin_memory=opt.pin_memory)
    faster_rcnn = FasterRCNNVGG16()
    # faster_rcnn = FasterRCNNResNet50()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()

    # if opt.load_path:
    #     trainer.load(opt.load_path)
    #     print('load pretrained model from %s' % opt.load_path)

    # trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
    lr_ = opt.lr
    for epoch in range(opt.epoch):
        trainer.reset_meters()
        tqdm.monitor_interval = 0
        for ii, sample in tqdm(enumerate(train_loader)):
            if len(sample.keys()) == 5:
                img_id, img, bbox, scale, label = sample['img_id'], sample['image'], sample['bbox'], sample['scale'], \
                                                    sample['label']
                img, bbox, label = img.cuda().float(), bbox.cuda(), label.cuda(
                )
                img, bbox, label = Variable(img), Variable(bbox), Variable(
                    label)

            else:
                img_id, img, bbox, scale, label = sample['img_id'], sample['image'], np.zeros((1, 0, 4)), \
                                                  sample['scale'], np.zeros((1, 0, 1))
                img = img.cuda().float()
                img = Variable(img)

            if bbox.size == 0:
                continue

            scale = at.scalar(scale)
            trainer.train_step(img_id, img, bbox, label, scale)
            if (ii + 1) % opt.plot_every == 0:
                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())

                # plot ground truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_, img_id[0], at.tonumpy(bbox[0]),
                                     at.tonumpy(label[0]))

                trainer.vis.img('gt_img', gt_img)

                # plot predicted bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict(
                    [ori_img_], visualize=True)
                pred_img = visdom_bbox(ori_img_, img_id[0],
                                       at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))

                trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()),
                                 win='rpn_cm')
                # roi confusion matrix
                trainer.vis.img(
                    'roi_cm',
                    at.totensor(trainer.roi_cm.conf, False).float())

        mAP = eval_mAP(trainer, val_loader)
        trainer.vis.plot('val_mAP', mAP)
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        log_info = 'lr:{}, map:{},loss:{}'.format(
            str(lr_), str(mAP), str(trainer.get_meter_data()))
        trainer.vis.log(log_info)
        if mAP > best_map:
            best_map = mAP
            best_path = trainer.save(best_map=best_map)
        if epoch == opt.epoch - 1:
            best_path = trainer.save()

        if (epoch + 1) % 10 == 0:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay
Пример #13
0
def train(**kwargs):
    # opt._parse(kwargs)

    print('load data')
    dataloader = get_train_loader(opt.root_dir,
                                  batch_size=opt.batch_size,
                                  shuffle=opt.shuffle,
                                  num_workers=opt.num_workers,
                                  pin_memory=opt.pin_memory)
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()

    # if opt.load_path:
    #     trainer.load(opt.load_path)
    #     print('load pretrained model from %s' % opt.load_path)

    # trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
    lr_ = opt.lr
    for epoch in range(opt.epoch):
        trainer.reset_meters()
        for ii, sample in tqdm(enumerate(dataloader)):
            if len(sample.keys()) == 5:
                img_id, img, bbox_, scale, label_ = sample['img_id'], sample['image'], sample['bbox'], sample['scale'], \
                                                    sample['label']
                img, bbox, label = img.cuda().float(), bbox_.cuda(
                ), label_.cuda()
                img, bbox, label = Variable(img), Variable(bbox), Variable(
                    label)

            else:
                img_id, img, bbox, scale, label = sample['img_id'], sample['image'], np.zeros((1, 0, 4)), \
                                                  sample['scale'], np.zeros((1, 0, 1))
                img = img.cuda().float()
                img = Variable(img)

            # if label.size == 0:
            #     continue

            scale = at.scalar(scale)
            trainer.train_step(img_id, img, bbox, label, scale)
            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())

                # plot ground truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]),
                                     at.tonumpy(label_[0]))
                trainer.vis.img('gt_img', gt_img)

                # plot predicted bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict(
                    [ori_img_], visualize=True)
                pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))
                trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()),
                                 win='rpn_cm')
                # roi confusion matrix
                trainer.vis.img(
                    'roi_cm',
                    at.totensor(trainer.roi_cm.conf, False).float())

        if epoch % 10 == 0:
            best_path = trainer.save(best_map=best_map)
Пример #14
0
def cutmix_generate(img,
                    scale,
                    paste_scale,
                    paste_img,
                    attention_map,
                    bboxes,
                    labels,
                    paste_bboxes,
                    paste_labels,
                    info,
                    bbox_drop=True,
                    rescale=True,
                    device='cuda',
                    threshold=0.7,
                    overlap_threshold=0.7):
    src_img = inverse_normalize(at.tonumpy(img.squeeze()))
    src_img = torch.from_numpy(src_img).to(device)
    target_img = inverse_normalize(at.tonumpy(paste_img.squeeze()))
    target_img = torch.from_numpy(target_img.squeeze().copy()).to(device)
    target_img_copy = target_img.clone()

    src_size = src_img.shape
    target_size = target_img.shape
    img_size = [
        target_size[1] if target_size[1] > src_size[1] else src_size[1],
        target_size[2] if target_size[2] > src_size[2] else src_size[2]
    ]
    if target_size[1] >= img_size[1] and target_size[2] >= src_size[2]:
        new_scale = paste_scale
    else:
        new_scale = scale

    attention_map = torch.nn.functional.interpolate(
        attention_map.unsqueeze(0).unsqueeze(0), img_size).squeeze()
    mask = torch.zeros_like(attention_map).to(device)
    #     print(attention_map.max(), attention_map.min(), attention_map.mean())
    mean_threshold = attention_map.mean()
    mask[attention_map > mean_threshold] = 1
    mask[attention_map <= mean_threshold] = 0  # mask表示重要的区域
    #     print(torch.sum(mask==1)/mask.shape[0]/mask.shape[1])

    #     if torch.sum(mask)/mask.shape[0]/mask.shape[1] > 0.9:
    #         info["use_cutmix"] = 0
    #         res_img = pytorch_normalze(target_img_copy/255).unsqueeze(dim=0)
    #         return res_img, torch.from_numpy(paste_bboxes), torch.from_numpy(paste_labels), paste_scale, info

    mask = mask.int()

    x_scale = img_size[0] / src_size[1]
    y_scale = img_size[1] / src_size[2]

    x_scale_p = img_size[0] / target_size[1]
    y_scale_p = img_size[1] / target_size[2]

    h, w = img_size

    src_img = torch.nn.functional.interpolate(src_img.unsqueeze(0),
                                              img_size).squeeze()

    target_img = torch.nn.functional.interpolate(target_img.unsqueeze(0),
                                                 img_size).squeeze()

    if len(bboxes.shape) == 2 and bboxes.shape[1] == 4:
        pass
    else:
        bboxes = bboxes.reshape([-1, 4])
    bboxes = bboxes.int()
    labels = labels.reshape(-1, )
    ori_mask = torch.zeros(src_img.shape[1:]).to(device)
    new_bboxes = []
    new_labels = []
    paste_bboxes_cp = paste_bboxes.copy()
    bboxes_cp = bboxes.clone().detach()
    res_img = target_img.clone()

    b_h = (bboxes[:, 3] - bboxes[:, 1]).unsqueeze(dim=1)
    b_w = (bboxes[:, 2] - bboxes[:, 0]).unsqueeze(dim=1)
    b = torch.cat([b_h, b_w], axis=1)
    areas = torch.prod(b, 1)

    _, index = torch.sort(areas)
    for i in index:
        bboxes_cp[i, [0, 2]] = (bboxes[i, [0, 2]] * x_scale).int()
        bboxes_cp[i, [1, 3]] = (bboxes[i, [1, 3]] * y_scale).int()
        ymin, xmin, ymax, xmax = bboxes_cp[i].int()
        x0 = ymin.item()
        x1 = ymax.item()
        y0 = xmin.item()
        y1 = xmax.item()
        ori_mask[x0:x1, y0:y1] = 1
        ori_mask = ori_mask.int()
        area = (x1 - x0) * (y1 - y0)
        #         print(area/w/h)

        portion = torch.sum(mask[x0:x1, y0:y1] & ori_mask[x0:x1, y0:y1]) / area

        if bbox_drop:
            remain_flag = True if np.random.rand(1) > 0.2 else False
        else:
            remain_flag = True
        if rescale:
            rescale_flag = True if np.random.rand(1) > 0.0 else False
        else:
            rescale_flag = False
        if rescale_flag:
            if np.random.randn(1) >= 0.5:
                rescale_conf = np.random.randint(low=10, high=1000,
                                                 size=1)[0] * 0.0001 + 1
            else:
                rescale_conf = np.random.rand(1)[0]
#             print(rescale_conf)

        if portion < threshold or not remain_flag or area / h / w > 0.8:  # 不考虑占比大的bbox:

            mask[x0:x1, y0:y1] = 0
            for j in range(len(bboxes_cp)):
                x = np.maximum(x0, bboxes_cp[j][0].detach().cpu().numpy())
                y = np.maximum(y0, bboxes_cp[j][1].detach().cpu().numpy())
                x = np.minimum(x1, bboxes_cp[j][2].detach().cpu().numpy())
                y = np.minimum(y1, bboxes_cp[j][3].detach().cpu().numpy())
                mask[x:x, y:y] = 1
        else:

            index_y = torch.where(
                mask[ymin:ymax, xmin:xmax] == 1)[0].unique().sort()[0] + ymin
            index_x = torch.where(mask[ymin:ymax, xmin:xmax] == 1)[1].unique(
            ).sort()[0] + xmin  # bboxes尽量取得靠近图像边缘
            mask_m = (mask[ymin:ymax, xmin:xmax]
                      & ori_mask[ymin:ymax, xmin:xmax]).bool()

            mask_m = torch.stack([mask_m, mask_m, mask_m], dim=0)
            sub_obj = src_img[:, ymin:ymax, xmin:xmax]  # 取出来的目标

            if len(index_y) < 2 or len(index_x) < 2:
                print('!', index_y, type(index_y))

            if rescale_flag and 0 < ymin * rescale_conf < h and 0 < ymax * rescale_conf < h and 0 < xmin * rescale_conf < w and 0 < xmax * rescale_conf < w and (
                    ymax - ymin) * rescale_conf * (xmax -
                                                   xmin) * rescale_conf > 400:

                index_y = (index_y * rescale_conf).int()
                index_x = (index_x * rescale_conf).int()
                bbox_w = xmax.item() - xmin.item()
                bbox_h = ymax.item() - ymin.item()
                #                 print("before", ymin.item(), ymax.item(), xmin.item(), xmax.item())
                ymin = int(ymin * rescale_conf)
                ymax = int(ymax * rescale_conf)
                xmin = int(xmin * rescale_conf)
                xmax = int(xmax * rescale_conf)

                #                 print("enlarge", ymin, ymax, xmin, xmax)
                #                 print(bbox_h * rescale_conf,  bbox_w *rescale_conf)
                resize_f = tvtsf.Resize(
                    [int(bbox_h * rescale_conf),
                     int(bbox_w * rescale_conf)])
                r_mask_m = resize_f(mask_m).bool()
                #                 print(r_mask_m.shape, res_img[:, ymin:ymin+int(bbox_h*rescale_conf), xmin:xmin+int(bbox_w*rescale_conf)].shape)

                res_img[:, ymin:ymin + int(bbox_h * rescale_conf), xmin:xmin +
                        int(bbox_w * rescale_conf)][r_mask_m] = resize_f(
                            sub_obj)[r_mask_m]

            else:
                #                 print(ymax*rescale_conf, ymin*rescale_conf, h)
                #                 print(xmax*rescale_conf, xmin*rescale_conf, w)
                res_img[:, ymin:ymax, xmin:xmax][mask_m] = sub_obj[mask_m]

            new_bboxes.append([
                index_y[0].item(), index_x[0].item(), index_y[-1].item(),
                index_x[-1].item()
            ])
            new_labels.append(labels[i])

    new_bboxes_cp = np.concatenate([new_bboxes])
    mask = mask & ori_mask
    for i in range(paste_bboxes.shape[0]):
        paste_bboxes_cp[i, [0, 2]] = (paste_bboxes[i, [0, 2]] *
                                      x_scale_p).astype(np.int)
        paste_bboxes_cp[i, [1, 3]] = (paste_bboxes[i, [1, 3]] *
                                      y_scale_p).astype(np.int)
        if len(new_bboxes_cp) > 0:
            ymin, xmin, ymax, xmax = paste_bboxes_cp[i]
            area = (ymax - ymin) * (xmax - xmin)
            overlap = torch.sum(mask[int(ymin):int(ymax),
                                     int(xmin):int(xmax)] == 1) / area
            #             print(overlap)
            #             lt = np.maximum(paste_bboxes_cp[i:i+1:,:2], new_bboxes_cp[:,:2])
            #             rb = np.minimum(paste_bboxes_cp[i:i+1:,2:], new_bboxes_cp[:,2:])
            #             overlap = np.prod(rb - lt, axis=1) / area

            if (overlap > overlap_threshold).any():
                continue
#         if np.sum(mask[ymix:ymax, xmax:xmax] == 1) / area > 0.6 :
#             continue
        new_bboxes.append(paste_bboxes_cp[i])
        new_labels.append(paste_labels[i])

    if len(new_bboxes) == 0:
        new_bboxes = None
    else:
        new_bboxes = torch.from_numpy(np.concatenate(
            [new_bboxes])).float().to(device).unsqueeze(dim=0)
        new_labels = torch.Tensor(new_labels).int().to(device).unsqueeze(dim=0)

    mask = mask.int()
    new_mask = mask
    #     res_img = src_img.mul(new_mask) + target_img.mul(1 - new_mask)
    area = torch.sum(new_mask == 1).float().item() / (
        torch.sum(ori_mask == 1) + 1e-8).float().item()

    # compute iou
    res_img = res_img / 255
    res_img = res_img.float()

    #     if area < 0.6 or len(new_bboxes)==0:
    #     print(area)
    if len(new_bboxes) == 0:
        info["use_cutmix"] = 0
        res_img = pytorch_normalze(target_img_copy / 255).unsqueeze(dim=0)
        return res_img, torch.from_numpy(paste_bboxes), torch.from_numpy(
            paste_labels), paste_scale, info

    else:
        info["use_cutmix"] = 1
        res_img = pytorch_normalze(res_img).unsqueeze(dim=0)
        return res_img, new_bboxes, new_labels, new_scale, info
Пример #15
0
def train(opt, faster_rcnn, dataloader,  val_dataloader,
          test_dataloader, trainer, lr_, best_map, start_epoch):
    trainer.train()
    for epoch in range(start_epoch, start_epoch+opt.epoch):
        trainer.reset_meters()
        pbar = tqdm(enumerate(dataloader), total=len(dataloader))
        for ii, (img, bbox_, label_, scale) in pbar:
            # Currently configured to predict (y_min, x_min, y_max, x_max)
#             bbox_tmp = bbox_.clone()
#             bbox_ = transform_bbox(bbox_)
            scale = at.scalar(scale)

            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            losses = trainer.train_step(img, bbox, label, scale)
            if ii % 100 == 0:
                rpnloc = losses[0].cpu().data.numpy()
                rpncls = losses[1].cpu().data.numpy()
                roiloc = losses[2].cpu().data.numpy()
                roicls = losses[3].cpu().data.numpy()
                tot = losses[4].cpu().data.numpy()
                pbar.set_description(f"Epoch: {epoch} | Batch: {ii} | RPNLoc Loss: {rpnloc:.4f} | RPNclc Loss: {rpncls:.4f} | ROIloc Loss: {roiloc:.4f} | ROIclc Loss: {roicls:.4f} | Total Loss: {tot:.4f}")
            
            if (ii+1) % 1000 == 0:
                eval_result = eval(val_dataloader, faster_rcnn, test_num=1000)
                trainer.vis.plot('val_map', eval_result['map'])
                lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
                val_log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_),
                                                   str(eval_result['map']),
                                                        str(trainer.get_meter_data()))
                trainer.vis.log(val_log_info)
                print("Evaluation Results on Val Set ")
                print(val_log_info)
                print("\n\n")


            if (ii + 1) % 100 == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())

                print(trainer.get_meter_data())
                try:
                    ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                    gt_img = visdom_bbox(ori_img_,
                                        at.tonumpy(bbox_[0]),
                                        at.tonumpy(label_[0]))
                    trainer.vis.img('gt_img', gt_img)
                    plt.show()

                    # plot predicti bboxes
                    _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True)
                    pred_img = visdom_bbox(ori_img_,
                                        at.tonumpy(_bboxes[0]),
                                        at.tonumpy(_labels[0]).reshape(-1),
                                        at.tonumpy(_scores[0]))
                    plt.show()
                    trainer.vis.img('pred_img', pred_img)

                    # rpn confusion matrix(meter)
                    trainer.vis.text(str(trainer.rpn_cm.value().tolist()),
                                     win='rpn_cm')
                    # roi confusion matrix
                    trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf,
                                                          False).float())
                except:
                    print("Cannot display images")
            if (ii + 1) % 100 == 0:
                eval_result = eval(val_dataloader, faster_rcnn, test_num=25)
                trainer.vis.plot('val_map', eval_result['map'])
                log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), str(
                    eval_result['map']), str(trainer.get_meter_data()))
                trainer.vis.log(log_info)


        # Save after every epoch
        epoch_path = trainer.save(epoch, best_map=0)
                
        eval_result = eval(test_dataloader, faster_rcnn, test_num=1000)
        trainer.vis.plot('test_map', eval_result['map'])
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        test_log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_),
                                                   str(eval_result['map']),
                                                        str(trainer.get_meter_data()))

        trainer.vis.log(test_log_info)
        print("Evaluation Results on Test Set ")
        print(test_log_info)
        print("\n\n")

        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = epoch_path

        if epoch == 9:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay

        if epoch == 13: 
            break
Пример #16
0
def train():
    # Get the dataset
    for phase in phases:
        if phase == 'train':
            if dataset_name == 'hollywood':
                train_data_list_path = os.path.join(
                    opt.hollywood_dataset_root_path, 'hollywood_train.idl')
                train_data_list = utils.get_phase_data_list(
                    train_data_list_path, dataset_name)
            if dataset_name == 'brainwash':
                train_data_list_path = os.path.join(
                    opt.brainwash_dataset_root_path, 'brainwash_train.idl')
                train_data_list = utils.get_phase_data_list(
                    train_data_list_path, dataset_name)
        elif phase == 'val':
            if dataset_name == 'hollywood':
                val_data_list_path = os.path.join(
                    opt.hollywood_dataset_root_path, 'hollywood_val.idl')
                val_data_list = utils.get_phase_data_list(
                    val_data_list_path, dataset_name)
            if dataset_name == 'brainwash':
                val_data_list_path = os.path.join(
                    opt.brainwash_dataset_root_path, 'brainwash_val.idl')
                val_data_list = utils.get_phase_data_list(
                    val_data_list_path, dataset_name)
        elif phase == 'test':
            if dataset_name == 'hollywood':
                test_data_list_path = os.path.join(
                    opt.hollywood_dataset_root_path, 'hollywood_test.idl')
                test_data_list = utils.get_phase_data_list(
                    test_data_list_path, dataset_name)
            if dataset_name == 'brainwash':
                test_data_list_path = os.path.join(
                    opt.brainwash_dataset_root_path, 'brainwash_test.idl')
                test_data_list = utils.get_phase_data_list(
                    test_data_list_path, dataset_name)

    print("Number of images for training: %s" % (len(train_data_list)))
    print("Number of images for val: %s" % (len(val_data_list)))
    print("Number of images for test: %s" % (len(test_data_list)))

    if data_check_flag:
        utils.check_loaded_data(train_data_list[random.randint(
            1, len(train_data_list))])
        utils.check_loaded_data(val_data_list[random.randint(
            1, len(val_data_list))])
        utils.check_loaded_data(test_data_list[random.randint(
            1, len(test_data_list))])

    # Load the train dataset
    train_dataset = Dataset(train_data_list)
    test_dataset = Dataset(val_data_list)
    print("Load data.")

    train_dataloader = data_.DataLoader(train_dataset,
                                        batch_size=1,
                                        shuffle=True,
                                        num_workers=1)
    test_dataloader = data_.DataLoader(test_dataset,
                                       batch_size=1,
                                       shuffle=True,
                                       num_workers=1)
    # Initialize the head detector.
    head_detector_vgg16 = Head_Detector_VGG16(ratios=[1],
                                              anchor_scales=[8, 16])
    print("model construct completed")
    trainer = Head_Detector_Trainer(head_detector_vgg16).cuda()
    lr_ = opt.lr
    for epoch in range(opt.epoch):
        trainer.reset_meters()
        for ii, (img, bbox_, scale) in enumerate(train_dataloader):
            scale = at.scalar(scale)
            img, bbox = img.cuda().float(), bbox_.cuda()
            img, bbox = Variable(img), Variable(bbox)
            _, _, _ = trainer.train_step(img, bbox, scale)
            print("Forward and backward pass done.")
            if (ii + 1) % opt.plot_every == 0:
                trainer.vis.plot_many(trainer.get_meter_data())
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]))
                trainer.vis.img('gt_img', gt_img)
                rois, _ = trainer.head_detector.predict(img,
                                                        scale=scale,
                                                        mode='visualize')
                pred_img = visdom_bbox(ori_img_, at.tonumpy(rois))
                trainer.vis.img('pred_img', pred_img)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()),
                                 win='rpn_cm')

        avg_test_CorrLoc = eval(test_dataloader, head_detector_vgg16)

        print("Epoch {} of {}.".format(epoch + 1, opt.epoch))
        print("  test average corrLoc accuracy:\t\t{:.3f}".format(
            avg_test_CorrLoc))

        model_save_path = trainer.save(best_map=avg_test_CorrLoc)

        if epoch == 8:
            trainer.load(model_save_path)
            trainer.head_detector.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay
Пример #17
0
def train(**kwargs):
    opt._parse(
        kwargs
    )  #将调用函数时候附加的参数用,config.py文件里面的opt._parse()进行解释,然后获取其数据存储的路径,之后放到Dataset里面!

    dataset = Dataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset,
                                  batch_size=1,
                                  shuffle=True,
                                  num_workers=opt.num_workers)

    # testset = TestDataset(opt)
    # test_dataloader = data_.DataLoader(testset,
    #                                    batch_size=1,
    #                                    num_workers=opt.test_num_workers,
    #                                    shuffle=False,
    #                                    #pin_memory=True
    #                                    )   #pin_memory锁页内存,开启时使用显卡的内存,速度更快

    faster_rcnn = FasterRCNN(7)
    print('model construct completed')
    pdb.set_trace()
    trainer = Trainer(faster_rcnn).cuda()
    #判断opt.load_path是否存在,如果存在,直接从opt.load_path读取预训练模型,然后将训练数据的label进行可视化操作
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)

    best_map = 0
    lr_ = opt.lr
    # 之后用一个for循环开始训练过程,而训练迭代的次数opt.epoch=14也在config.py文件中都预先定义好,属于超参数
    for epoch in range(opt.epoch):
        print('epoch {}/{}'.format(epoch, opt.epoch))
        trainer.reset_meters()  #首先在可视化界面重设所有数据
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            scale = scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                #可视化画出loss

                #可视化画出groudtruth bboxes
                ori_img_ = inverse_normalize(array_tool.tonumpy(img[0]))

                #可视化画出预测bboxes
                # 调用faster_rcnn的predict函数进行预测,预测的结果保留在以_下划线开头的对象里面
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict(
                    [ori_img_], visualize=False)

        #eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)

        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        log_info = 'lr:{}, map:{}, loss:{}'.format(
            str(lr_), str(eval_result['map']), str(trainer.get_meter_data()))

        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(best_map=best_map)
        if epoch == 9:  #if判断语句如果学习的epoch达到了9就将学习率*0.1变成原来的十分之一
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay

        if epoch == 13:
            break
Пример #18
0
def train(**kwargs):
    opt._parse(kwargs)

    dataset = Dataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  # pin_memory=True,

                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)
    trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
    lr_ = opt.lr
    for epoch in range(opt.epoch):
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())

                # plot groud truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]),
                                     at.tonumpy(label_[0]))
                trainer.vis.img('gt_img', gt_img)

                # plot predicti bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict(
                    [ori_img_], visualize=True)
                pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))
                trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()),
                                 win='rpn_cm')
                # roi confusion matrix
                trainer.vis.img(
                    'roi_cm',
                    at.totensor(trainer.roi_cm.conf, False).float())
        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)
        trainer.vis.plot('test_map', eval_result['map'])
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        log_info = 'lr:{}, map:{},loss:{}'.format(
            str(lr_), str(eval_result['map']), str(trainer.get_meter_data()))
        trainer.vis.log(log_info)

        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(best_map=best_map)
        if epoch == 9:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay

        if epoch == 13:
            break
Пример #19
0
def train(**kwargs):
    """
    训练
    """
    #解析命令行参数,设置配置文件参数
    opt._parse(kwargs)
    #初始化Dataset参数
    dataset = Dataset(opt)
    print('load data')
    #data_ 数据加载器(被重命名,pytorch方法)
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  # pin_memory=True,
                                  num_workers=opt.num_workers)
    #初始化TestDataset参数
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
    #新建一个FasterRCNNVGG16
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    #新建一个trainer,并将网络模型转移到GPU上
    #将FasterRCNNVGG16模型传入
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    #如果存在,加载训练好的模型
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)
    #可视化类别 vis为visdom加载器
    trainer.vis.text(dataset.db.label_names, win='labels')
    #best_map存放的是 最优的mAP的网络参数
    best_map = 0
    lr_ = opt.lr
    for epoch in range(opt.epoch):
        #trainer方法 将平均精度的元组 和 混淆矩阵的值置0
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            #调整数据的形状    scale:缩放倍数(输入图片尺寸 比上 输出数据的尺寸)
            #1.6左右 供模型训练之前将模型规范化
			scale = at.scalar(scale)
            #将数据集转入到GPU上
			#img  1x3x800x600  一张图片 三通道  大小800x600(不确定)
			#bbox 1x1x4
			#label 1x1
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            #将数据转为V 变量,以便进行自动反向传播
            img, bbox, label = Variable(img), Variable(bbox), Variable(label)
            #训练并更新可学习参数(重点*****)  前向+反向,返回losses
            trainer.train_step(img, bbox, label, scale)
            #进行多个数据的可视化
            if (ii + 1) % opt.plot_every == 0:
                #进入调试模式
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss  画五个损失
                trainer.vis.plot_many(trainer.get_meter_data())

                # plot groud truth bboxes  img[0],是压缩0位,形状变为[3x800x600]
                #反向归一化,将img反向还原为原始图像,以便用于显示
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                #通过原始图像,真实bbox,真实类别 进行显示
                gt_img = visdom_bbox(ori_img_,
                                     at.tonumpy(bbox_[0]),
                                     at.tonumpy(label_[0]))
                trainer.vis.img('gt_img', gt_img)

                # plot predicti bboxes
                #对原图进行预测,得到预测的bbox  label  scores
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True)
                #通过原始图像、预测的bbox,预测的类别   以及概率  进行显示
                pred_img = visdom_bbox(ori_img_,
                                       at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))
                trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                #rpn混淆矩阵
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm')
                # roi confusion matrix
                #roi混淆矩阵
                trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float())
        #使用验证集对当前的网络进行验证,返回一个字典,key值有AP,mAP
        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)
        #如果当前的map值优于best_map,则将当前值赋给best_map。将当前模型保留
        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(best_map=best_map)
        #如果epoch到达9时,加载 当前的最优模型,并将学习率按lr_decay衰减调低
        if epoch == 9:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay
        #可视化验证集的test_map 和log信息
        trainer.vis.plot('test_map', eval_result['map'])
        log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_),
                                                  str(eval_result['map']),
                                                  str(trainer.get_meter_data()))
        trainer.vis.log(log_info)
        if epoch == 13: 
            break
Пример #20
0
# This can be removed once PyTorch 0.4.x is out.
# See https://discuss.pytorch.org/t/question-about-rebuild-tensor-v2/14560
import torch._utils
try:
    torch._utils._rebuild_tensor_v2
except AttributeError:
    def _rebuild_tensor_v2(storage, storage_offset, size, stride, requires_grad, backward_hooks):
        tensor = torch._utils._rebuild_tensor(storage, storage_offset, size, stride)
        tensor.requires_grad = requires_grad
        tensor._backward_hooks = backward_hooks
        return tensor
    torch._utils._rebuild_tensor_v2 = _rebuild_tensor_v2

#%%
faster_rcnn = FasterRCNNVGG16()
trainer = FasterRCNNTrainer(faster_rcnn).cuda()
trainer.load('./checkpoints/fasterrcnn_09031352_0')
opt.caffe_pretrain=True  # this model was trained from caffe-pretrained model
# Plot examples on training set
dataset = RSNADataset(opt.root_dir)
for i in range(0, len(dataset)):
    sample = dataset[i]
    img = sample['image']
    ori_img_ = inverse_normalize(at.tonumpy(img))

    # plot predicti bboxes
    _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True)
    pred_img = vis_bbox(ori_img_,
                           at.tonumpy(_bboxes[0]),
                           at.tonumpy(_labels[0]).reshape(-1),
                           at.tonumpy(_scores[0]))
def train(**kwargs):
    opt._parse(kwargs)

    dataset = Dataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset,
                                  batch_size=1,
                                  shuffle=True, \
                                  # pin_memory=True,

                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False,
                                       pin_memory=True)
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)
    trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
    best_ap = np.array([0.] * opt.label_number)
    lr_ = opt.lr
    vis = trainer.vis
    starttime = datetime.datetime.now()
    for epoch in range(opt.epoch):
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())

                # plot groud truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]),
                                     at.tonumpy(label_[0]))
                trainer.vis.img('gt_img', gt_img)

                # plot predicti bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict(
                    [ori_img_], visualize=True)
                pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))
                trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()),
                                 win='rpn_cm')
                # roi confusion matrix
                roi_cm = at.totensor(trainer.roi_cm.conf, False).float()
                trainer.vis.img('roi_cm', roi_cm)

        eval_result = eval(test_dataloader,
                           faster_rcnn,
                           vis=vis,
                           test_num=opt.test_num)
        best_ap = dict(zip(opt.VOC_BBOX_LABEL_NAMES, eval_result['ap']))
        trainer.vis.plot('test_map', eval_result['map'])
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        log_info = 'lr:{}, map:{},loss:{}'.format(
            str(lr_), str(eval_result['map']), str(trainer.get_meter_data()))
        trainer.vis.log(log_info)

        if eval_result['map'] > best_map:
            print('roi_cm=\n', trainer.roi_cm.value())
            plot_confusion_matrix(trainer.roi_cm.value(),
                                  classes=('animal', 'plant', 'rock',
                                           'background'),
                                  normalize=False,
                                  title='Normalized Confusion Matrix')
            best_map = eval_result['map']
            best_path = trainer.save(best_map=best_map, best_ap=best_ap)
        if epoch == 9:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay

        # if epoch == 13:
        #     break
    endtime = datetime.datetime.now()
    train_consum = (endtime - starttime).seconds
    print("train_consum=", train_consum)
def train(**kwargs):
    opt._parse(kwargs)
    dataset = Dataset(opt)
    # 300w_dataset = FaceLandmarksDataset()
    print('load data')
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  pin_memory=True,\
                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    attacker = attacks.DCGAN(train_adv=False)
    if opt.load_attacker:
        attacker.load(opt.load_attacker)
        print('load attacker model from %s' % opt.load_attacker)
    trainer = VictimFasterRCNNTrainer(faster_rcnn, attacker,
                                      attack_mode=True).cuda()
    # trainer = VictimFasterRCNNTrainer(faster_rcnn).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)

    trainer.vis.text(dataset.db.label_names, win='labels')
    # eval_result = eval(test_dataloader, faster_rcnn, test_num=2000)
    best_map = 0
    for epoch in range(opt.epoch):
        trainer.reset_meters(adv=True)
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            ipdb.set_trace()
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            img, bbox, label = Variable(img), Variable(bbox), Variable(label)
            trainer.train_step(img, bbox, label, scale)

            if (ii) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())
                trainer.vis.plot_many(trainer.get_meter_data(adv=True))

                # plot groud truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]),
                                     at.tonumpy(label_[0]))
                trainer.vis.img('gt_img', gt_img)

                # plot predicted bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict(
                    [ori_img_], visualize=True)
                pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))
                trainer.vis.img('pred_img', pred_img)
                if trainer.attacker is not None:
                    adv_img = trainer.attacker.perturb(img)
                    adv_img_ = inverse_normalize(at.tonumpy(adv_img[0]))
                    _bboxes, _labels, _scores = trainer.faster_rcnn.predict(
                        [adv_img_], visualize=True)
                    adv_pred_img = visdom_bbox(
                        adv_img_, at.tonumpy(_bboxes[0]),
                        at.tonumpy(_labels[0]).reshape(-1),
                        at.tonumpy(_scores[0]))
                    trainer.vis.img('adv_img', adv_pred_img)
                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()),
                                 win='rpn_cm')
                # roi confusion matrix
                trainer.vis.img(
                    'roi_cm',
                    at.totensor(trainer.roi_cm.conf, False).float())

                if (ii) % 500 == 0:
                    best_path = trainer.save(epochs=epoch, save_rcnn=True)

        if epoch % 2 == 0:
            best_path = trainer.save(epochs=epoch)
Пример #23
0
def train(**kwargs):
    opt._parse(kwargs)

    dataset = Dataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset,
                                  batch_size=1,
                                  shuffle=True,
                                  # pin_memory=True,
                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False,
                                       pin_memory=True
                                       )
    testset_all = TestDataset_all(opt, 'test2')
    test_all_dataloader = data_.DataLoader(testset_all,
                                           batch_size=1,
                                           num_workers=opt.test_num_workers,
                                           shuffle=False,
                                           pin_memory=True
                                           )

    tsf = Transform(opt.min_size, opt.max_size)
    faster_rcnn = FasterRCNNVGG16()
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    print('model construct completed')

    # 加载训练过的模型,在config配置路径就可以了
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)

    #提取蒸馏知识所需要的软标签
    if opt.is_distillation == True:
        opt.predict_socre = 0.3
        for ii, (imgs, sizes, gt_bboxes_, gt_labels_, scale, id_) in tqdm(enumerate(dataloader)):
            if len(gt_bboxes_) == 0:
                continue
            sizes = [sizes[0][0].item(), sizes[1][0].item()]
            pred_bboxes_, pred_labels_, pred_scores_, features_ = trainer.faster_rcnn.predict(imgs, [
                sizes])

            img_file = os.path.join(
                opt.voc_data_dir, 'JPEGImages', id_[0] + '.jpg')
            ori_img = read_image(img_file, color=True)
            img, pred_bboxes_, pred_labels_, scale_ = tsf(
                (ori_img, pred_bboxes_[0], pred_labels_[0]))

            #去除软标签和真值标签重叠过多的部分,去除错误的软标签
            pred_bboxes_, pred_labels_, pred_scores_ = py_cpu_nms(
                gt_bboxes_[0], gt_labels_[0], pred_bboxes_, pred_labels_, pred_scores_[0])

            #存储软标签,这样存储不会使得GPU占用过多
            np.save('label/' + str(id_[0]) + '.npy', pred_labels_)
            np.save('bbox/' + str(id_[0]) + '.npy', pred_bboxes_)
            np.save('feature/' + str(id_[0]) + '.npy', features_)
            np.save('score/' + str(id_[0]) + '.npy', pred_scores_)

        opt.predict_socre = 0.05
    t.cuda.empty_cache()

    # visdom 显示所有类别标签名
    trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
    lr_ = opt.lr

    for epoch in range(opt.epoch):
        print('epoch=%d' % epoch)

        # 重置混淆矩阵
        trainer.reset_meters()
        # tqdm可以在长循环中添加一个进度提示信息,用户只需要封装任意的迭代器 tqdm(iterator),
        # 是一个快速、扩展性强
        for ii, (img, sizes, bbox_, label_, scale, id_) in tqdm(enumerate(dataloader)):
            if len(bbox_) == 0:
                continue
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            # 训练的就这一步 下面的都是打印的信息
            # 转化成pytorch能够计算的格式,转tensor格式
            if opt.is_distillation == True:
                #读取软标签
                teacher_pred_labels = np.load(
                    'label/' + str(id_[0]) + '.npy')
                teacher_pred_bboxes = np.load(
                    'bbox/' + str(id_[0]) + '.npy')
                teacher_pred_features_ = np.load(
                    'feature/' + str(id_[0]) + '.npy')
                teacher_pred_scores = np.load(
                    'score/' + str(id_[0]) + '.npy')
                #格式转换
                teacher_pred_bboxes = teacher_pred_bboxes.astype(np.float32)
                teacher_pred_labels = teacher_pred_labels.astype(np.int32)
                teacher_pred_scores = teacher_pred_scores.astype(np.float32)
                #转成pytorch格式
                teacher_pred_bboxes_ = at.totensor(teacher_pred_bboxes)
                teacher_pred_labels_ = at.totensor(teacher_pred_labels)
                teacher_pred_scores_ = at.totensor(teacher_pred_scores)
                teacher_pred_features_ = at.totensor(teacher_pred_features_)
                #使用GPU
                teacher_pred_bboxes_ = teacher_pred_bboxes_.cuda()
                teacher_pred_labels_ = teacher_pred_labels_.cuda()
                teacher_pred_scores_ = teacher_pred_scores_.cuda()
                teacher_pred_features_ = teacher_pred_features_.cuda()

                # 如果dataset.py 中的Transform 设置了图像翻转,就要使用这个判读软标签是否一起翻转
                if(teacher_pred_bboxes_[0][1] != bbox[0][0][1]):
                    _, o_C, o_H, o_W = img.shape
                    teacher_pred_bboxes_ = flip_bbox(
                        teacher_pred_bboxes_, (o_H, o_W), x_flip=True)

                losses = trainer.train_step(img, bbox, label, scale, epoch,
                                            teacher_pred_bboxes_, teacher_pred_labels_, teacher_pred_features_, teacher_pred_scores)
            else:
                trainer.train_step(img, bbox, label, scale, epoch)

            # visdom显示的信息
            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())

                # plot groud truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_,
                                     at.tonumpy(bbox_[0]),
                                     at.tonumpy(label_[0]))
                trainer.vis.img('gt_img', gt_img)

                gt_img = visdom_bbox(ori_img_,
                                     at.tonumpy(teacher_pred_bboxes_),
                                     at.tonumpy(teacher_pred_labels_),
                                     at.tonumpy(teacher_pred_scores_))
                trainer.vis.img('gt_img_all', gt_img)

                # plot predicti bboxes
                _bboxes, _labels, _scores, _ = trainer.faster_rcnn.predict(
                    [ori_img_], visualize=True)
                pred_img = visdom_bbox(ori_img_,
                                       at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))
                trainer.vis.img('pred_img', pred_img)

                # 混淆矩阵
                # rpn confusion matrix(meter)
                trainer.vis.text(
                    str(trainer.rpn_cm.value().tolist()), win='rpn_cm')
                # roi confusion matrix
                trainer.vis.text(
                    str(trainer.roi_cm.value().tolist()), win='roi_cm')
                # trainer.vis.img('roi_cm', at.totensor(
                # trainer.roi_cm.value(), False).float())

        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)
        trainer.vis.plot('test_map', eval_result['map'])
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        log_info = 'lr:{},ap:{}, map:{},loss:{}'.format(str(lr_),
                                                        str(eval_result['ap']),
                                                        str(eval_result['map']),
                                                        str(trainer.get_meter_data()))
        trainer.vis.log(log_info)

        # 保存最好结果并记住路径
        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(best_map=best_map)

        if epoch == 20:
            trainer.save(best_map='20')
            result = eval(test_all_dataloader,
                          trainer.faster_rcnn, test_num=5000)
            print('20result={}'.format(str(result)))
            # trainer.load(best_path)
            # result=eval(test_all_dataloader,trainer.faster_rcnn,test_num=5000)
            # print('bestmapresult={}'.format(str(result)))
            break

        # 每10轮加载前面最好权重,并且减少学习率
        if epoch % 20 == 15:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay
Пример #24
0
def train(**kwargs):
    opt.parse(kwargs)

    print('loading data...')

    trainset = TrainDataset(opt)
    train_dataloader = torch.utils.data.DataLoader(trainset,
                                                   batch_size=1,
                                                   shuffle=True,
                                                   num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = torch.utils.data.DataLoader(
        testset,
        batch_size=1,
        num_workers=opt.test_num_workers,
        shuffle=False,
        pin_memory=True)

    print('constructing model...')

    if opt.model == 'vgg16':
        faster_rcnn = FasterRCNNVGG16()
    elif opt.model == 'resnet101':
        faster_rcnn = FasterRCNNResNet101()

    trainer = FasterRCNNTrainer(faster_rcnn).cuda()

    print('loading model...')

    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)
    else:
        print('no pretrained model found')

    trainer.vis.text('<br/>'.join(trainset.db.label_names), win='labels')

    print('start training...')

    best_map = 0.0
    lr_ = opt.lr
    for epoch in range(opt.epoch):
        print("epoch : %d training ..." % epoch)
        trainer.reset_meters()
        for ii, (imgs_, bboxes_, labels_,
                 scales_) in tqdm(enumerate(train_dataloader)):
            scales = at.scalar(scales_)
            imgs, bboxes, labels = imgs_.cuda().float(), bboxes_.cuda(
            ), labels_.cuda()
            trainer.train_step(imgs, bboxes, labels, scales)

            if (ii + 1) % opt.plot_every == 0:

                # plot loss
                trainer.vis.plot_many(trainer.losses_data())

                # generate plotted image

                img = inverse_normalize(at.tonumpy(imgs_[0]))

                # plot groud truth bboxes
                bbox = at.tonumpy(bboxes_[0])
                label = at.tonumpy(labels_[0])
                img_gt = visdom_bbox(img, bbox, label)
                trainer.vis.img('ground truth', img_gt)

                bboxes__, labels__, scores__ = trainer.faster_rcnn.predict(
                    [img], visualize=True)

                # plot prediction bboxes
                bbox = at.tonumpy(bboxes__[0])
                label = at.tonumpy(labels__[0]).reshape(-1)
                score = at.tonumpy(scores__[0])
                img_pred = visdom_bbox(img, bbox, label, score)
                trainer.vis.img('prediction', img_pred)

                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()),
                                 win='rpn_cm')

                # roi confusion matrix
                trainer.vis.img(
                    'roi_cm',
                    at.totensor(trainer.roi_cm.conf, False).float())

            if ii + 1 == opt.train_num:
                break

        print("epoch : %d evaluating ..." % epoch)

        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)
        trainer.vis.plot('test_map', eval_result['map'])
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        log_info = vis_dict(
            {
                'epoch': '%s/%s' % (str(epoch), str(opt.epoch)),
                'lr': lr_,
                'map': float(eval_result['map']),
            }, trainer.losses_data())

        trainer.vis.log(log_info)

        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(best_map="%.4f" % best_map)
        if epoch == 9:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay
Пример #25
0
def train(**kwargs):  # *变量名, 表示任何多个无名参数, 它是一个tuple;**变量名, 表示关键字参数, 它是一个dict
    opt._parse(kwargs)  # 识别参数,传递过来的是一个字典,用parse来解析

    dataset = Dataset(opt)  # 作者自定义的Dataset类
    print('读取数据中...')

    # Dataloader 定义了一次获取批次数据的方法
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  # pin_memory=True,

                                  num_workers=opt.num_workers) # PyTorch自带的DataLoader类,生成一个多线程迭代器来迭代dataset, 以供读取一个batch的数据
    testset = TestDataset(opt, split='trainval')

    # 测试集loader
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
    faster_rcnn = FasterRCNNVGG16()  # 网络定义
    print('模型构建完毕!')

    trainer = FasterRCNNTrainer(
        faster_rcnn).cuda()  # 定义一个训练器,返回loss, .cuda()表示把返回的Tensor存入GPU

    if opt.load_path:  # 如果要加载预训练模型
        trainer.load(opt.load_path)
        print('已加载预训练参数 %s' % opt.load_path)
    else:
        print("未引入预训练参数, 随机初始化网络参数")

    trainer.vis.text(dataset.db.label_names, win='labels')  # 显示labels标题
    best_map = 0  # 定义一个best_map

    for epoch in range(opt.epoch):  # 对于每一个epoch

        trainer.reset_meters()  # 重置测各种测量仪

        # 对每一个数据
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            scale = at.scalar(scale)  # 转化为标量
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda(
            )  # 存入GPU
            img, bbox, label = Variable(img), Variable(bbox), Variable(
                label)  # 转换成变量以供自动微分器使用
            # TODO
            trainer.train_step(img, bbox, label, scale)  # 训练一步

            if (ii + 1) % opt.plot_every == 0:  # 如果到达"每多少次显示"
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())

                # plot groud truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]),
                                     at.tonumpy(label_[0]))
                trainer.vis.img('gt_img', gt_img)

                # plot predicti bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict(
                    [ori_img_], visualize=True)
                pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))
                trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()),
                                 win='rpn_cm')
                # roi confusion matrix
                trainer.vis.img(
                    'roi_cm',
                    at.totensor(trainer.roi_cm.conf, False).float())

        # 使用测试数据集来评价模型(此步里面包含预测信息)
        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)

        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(
                best_map=best_map)  # 好到一定程度就存储模型, 存储在checkpoint文件夹内

        if epoch == 9:  # 到第9轮的时候读取模型, 并调整学习率
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)

        trainer.vis.plot('test_map', eval_result['map'])
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        log_info = 'lr:{}, map:{},loss:{}'.format(
            str(lr_), str(eval_result['map']), str(trainer.get_meter_data()))
        trainer.vis.log(log_info)

        # if epoch == 13:  # 到第14轮的时候停止训练
        #     break

    trainer.save(best_map=best_map)
Пример #26
0
def train(**kwargs):
    opt._parse(
        kwargs
    )  #将调用函数时候附加的参数用,config.py文件里面的opt._parse()进行解释,然后获取其数据存储的路径,之后放到Dataset里面!

    dataset = Dataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset,
                                  batch_size=1,
                                  shuffle=True,
                                  num_workers=opt.num_workers)

    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(
        testset,
        batch_size=1,
        num_workers=opt.test_num_workers,
        shuffle=False,
        #pin_memory=True
    )  #pin_memory锁页内存,开启时使用显卡的内存,速度更快

    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    #判断opt.load_path是否存在,如果存在,直接从opt.load_path读取预训练模型,然后将训练数据的label进行可视化操作
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)
    trainer.vis.text(dataset.dataset.label_names, win='labels')
    best_map = 0
    lr_ = opt.lr
    # 之后用一个for循环开始训练过程,而训练迭代的次数opt.epoch=14也在config.py文件中都预先定义好,属于超参数
    for epoch in range(opt.epoch):
        print('epoch {}/{}'.format(epoch, opt.epoch))
        trainer.reset_meters()  #首先在可视化界面重设所有数据
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            scale = array_tool.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                #可视化画出loss
                trainer.vis.plot_many(trainer.get_meter_data())
                #可视化画出groudtruth bboxes
                ori_img_ = inverse_normalize(array_tool.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_, array_tool.tonumpy(bbox_[0]),
                                     array_tool.tonumpy(label_[0]))
                trainer.vis.img('gt_img', gt_img)

                #可视化画出预测bboxes
                # 调用faster_rcnn的predict函数进行预测,预测的结果保留在以_下划线开头的对象里面
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict(
                    [ori_img_], visualize=True)
                pred_img = visdom_bbox(
                    ori_img_, array_tool.tonumpy(_bboxes[0]),
                    array_tool.tonumpy(_labels[0]).reshape(-1),
                    array_tool.tonumpy(_scores[0]))
                trainer.vis.img('pred_img', pred_img)
                # 调用 trainer.vis.text将rpn_cm也就是RPN网络的混淆矩阵在可视化工具中显示出来
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()),
                                 win='rpn_cm')
                #将roi_cm也就是roihead网络的混淆矩阵在可视化工具中显示出来
                trainer.vis.img(
                    'roi_cm',
                    array_tool.totensor(trainer.roi_cm.conf, False).float())
        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)
        trainer.vis.plot('test_map', eval_result['map'])
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        log_info = 'lr:{}, map:{}, loss{}'.format(
            str(lr_), str(eval_result['map']), str(trainer.get_meter_data()))
        trainer.vis.log(log_info)  #将学习率以及map等信息及时显示更新

        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(best_map=best_map)
        if epoch == 9:  #if判断语句如果学习的epoch达到了9就将学习率*0.1变成原来的十分之一
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay

        if epoch == 13:
            break
Пример #27
0
def train(**kwargs):
    opt._parse(kwargs)

    print('load data')
    dataset = Dataset(opt)
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  # pin_memory=True,
                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )

    faster_rcnn = FasterRCNNVGG16(n_fg_class=dataset.get_class_count(), anchor_scales=[1])
    print('model construct completed')

    trainer = FasterRCNNTrainer(faster_rcnn, n_fg_class=dataset.get_class_count())

    if opt.use_cuda:
        trainer = trainer.cuda()

    if opt.load_path:
        old_state = trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)

    if opt.validate_only:
        num_eval_images = len(testset)
        eval_result = eval(test_dataloader, faster_rcnn, test_num=num_eval_images)
        print('Evaluation finished, obtained {} using {} out of {} images'.
                format(eval_result, num_eval_images, len(testset)))
        return
    
    if opt.load_path and 'epoch' in old_state.keys():
        starting_epoch = old_state['epoch'] + 1
        print('Model was trained until epoch {}, continuing with epoch {}'.format(old_state['epoch'], starting_epoch))
    else:
        starting_epoch = 0
    
    #trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
    lr_ = opt.lr
    global_step = 0
    for epoch in range(starting_epoch, opt.num_epochs):
        lr_ = opt.lr * (opt.lr_decay ** (epoch // opt.epoch_decay))
        trainer.faster_rcnn.set_lr(lr_)

        print('Starting epoch {} with learning rate {}'.format(epoch, lr_))
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader), total=len(dataset)):
            global_step = global_step + 1
            scale = at.scalar(scale)
            if opt.use_cuda:
                img, bbox, label = img.cuda().float(), bbox_.float().cuda(), label_.float().cuda()
            else:
                img, bbox, label = img.float(), bbox_.float(), label_.float()
            img, bbox, label = Variable(img), Variable(bbox), Variable(label)
            losses = trainer.train_step(img, bbox, label, scale)
            
            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                #trainer.vis.plot_many(trainer.get_meter_data())

                # plot groud truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_,
                                     at.tonumpy(bbox_[0]),
                                     at.tonumpy(label_[0]),
                                     label_names=dataset.get_class_names()+['BG'])
                trainer.vis.img('gt_img', gt_img)

                # plot predicti bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True)
                pred_img = visdom_bbox(ori_img_,
                                       at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]),
                                       label_names=dataset.get_class_names()+['BG'])
                trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                #trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm')
                # roi confusion matrix
                #trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float())
                
                #print('Current total loss {}'.format(losses[-1].tolist()))
                trainer.vis.plot('train_total_loss', losses[-1].tolist())
                
            if (global_step) % opt.snapshot_every == 0:
                snapshot_path = trainer.save(epoch=epoch)
                print("Snapshotted to {}".format(snapshot_path))

        #snapshot_path = trainer.save(epoch=epoch)
        #print("After epoch {}: snapshotted to {}".format(epoch,snapshot_path))
        
        eval_result = eval(test_dataloader, faster_rcnn, test_num=min(opt.test_num, len(testset)))
        print(eval_result)
        # TODO: this definitely is not good and will bias evaluation
        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(best_map=eval_result['map'],epoch=epoch)
            print("After epoch {}: snapshotted to {}".format(epoch, best_path))


        trainer.vis.plot('test_map', eval_result['map'])
def train(**kwargs):
    opt._parse(kwargs)

    dataset = Dataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  # pin_memory=True,
                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from {}'.format(opt.load_path))

    # trainer.vis.text(dataset.db.label_names, win='labels')
    adversary = None
    if opt.flagadvtrain:
        print("flagadvtrain turned: Adversarial training!")
        atk = PGD.PGD(trainer, eps=16/255, alpha=3/255, steps=4)
        # atk = torchattacks.PGD(trainer.faster_rcnn, eps=16, alpha=3, steps=4)
        # adversary = PGDAttack(trainer.faster_rcnn, loss_fn=nn.CrossEntropyLoss(), eps=16, nb_iter=4, eps_iter=3,
        #                       rand_init=True, clip_min=0.0, clip_max=1.0, targeted=False)
    best_map = 0
    lr_ = opt.lr
    normal_total_loss = []
    adv_total_loss = []
    total_time = 0.0
    total_imgs = 0
    true_imgs = 0
    for epoch in range(opt.epoch):
        trainer.reset_meters()
        once = True
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            scale = at.scalar(scale)
            temp_img = copy.deepcopy(img).cuda()
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()

            if opt.flagadvtrain:
                before_time = time.time()
                img = atk(img, bbox, label, scale)
                after_time = time.time()
                # with ctx_noparamgrad_and_eval(trainer.faster_rcnn):
                #     img = adversary.perturb(img, label)
                # print("Adversarial training done!")

            total_time += after_time - before_time
            # print("Normal training starts\n")
            # trainer.train_step(img, bbox, label, scale)


            if (ii + 1) % opt.plot_every == 0:
                # adv_total_loss.append(trainer.get_meter_data()["total_loss"])
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                # trainer.vis.plot_many(trainer.get_meter_data())

                # plot groud truth bboxes
                temp_ori_img_ = inverse_normalize(at.tonumpy(temp_img[0]))
                # img2jpg(temp_ori_img_, "imgs/orig_images/", "gt_img{}".format(ii))

                # temp_gt_img = visdom_bbox(temp_ori_img_,
                #                           at.tonumpy(bbox_[0]),
                #                           at.tonumpy(label_[0]))

                # plt.figure()
                # c, h, w = temp_gt_img.shape
                # plt.imshow(np.reshape(temp_gt_img, (h, w, c)))
                # plt.savefig("imgs/temp_orig_images/temp_gt_img{}".format(ii))
                # plt.close()

                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                # print("GT Label is {} and pred_label is {}".format(label_[0],))
                # img2jpg(ori_img_, "imgs/adv_images/", "adv_img{}".format(ii))

                # gt_img = visdom_bbox(ori_img_,
                #                      at.tonumpy(bbox_[0]),
                #                      at.tonumpy(label_[0]))

                # plt.figure()
                # c, h, w = gt_img.shape
                # plt.imshow(np.reshape(gt_img, (h, w, c)))
                # plt.savefig("imgs/orig_images/gt_img{}".format(ii))
                # plt.close()

                # trainer.vis.img('gt_img', gt_img)

                # plot predicti bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True)

                fig1 = plt.figure()
                ax1 = fig1.add_subplot(1,1,1)
                # final1 = (at.tonumpy(img[0].cpu()).transpose(1,2,0).astype(np.uint8))
                final1 = (ori_img_.transpose(1, 2, 0).astype(np.uint8))
                ax1.imshow(final1)

                gt_img = visdom_bbox(ax1,at.tonumpy(_bboxes[0]),at.tonumpy(_labels[0]))
                fig1.savefig("imgs/adv_images/adv_img{}".format(ii))
                plt.close()

                _temp_bboxes, _temp_labels, _temp_scores = trainer.faster_rcnn.predict([temp_ori_img_], visualize=True)

                fig2 = plt.figure()
                ax2 = fig2.add_subplot(1, 1, 1)
                final2 = (temp_ori_img_.transpose(1, 2, 0).astype(np.uint8))
                # final2 = (at.tonumpy(temp_img[0].cpu()).transpose(1, 2, 0).astype(np.uint8))
                ax2.imshow(final2)

                gt_img = visdom_bbox(ax2, at.tonumpy(_temp_bboxes[0]), at.tonumpy(_temp_labels[0]))
                fig2.savefig("imgs/orig_images/gt_img{}".format(ii))
                plt.close()
                # img2jpg(temp_gt_img, "imgs/orig_images/", "gt_img{}".format(ii))

                # print("gt labels is {}, pred_orig_labels is {} and pred_adv_labels is {}".format(label_, _labels, _temp_labels))
                total_imgs += 1
                if len(_temp_labels) == 0:
                    continue
                if _labels[0].shape[0] == _temp_labels[0].shape[0] and (_labels[0] == _temp_labels[0]).all() is True:
                    true_imgs += 1
                # pred_img = visdom_bbox(ori_img_,
                #                        at.tonumpy(_bboxes[0]),
                #                        at.tonumpy(_labels[0]).reshape(-1),
                #                        at.tonumpy(_scores[0]))
                #

                # print("Shape of temp_orig_img_ is {}".format(temp_ori_img_.shape))
                # temp_pred_img = visdom_bbox(temp_ori_img_,
                #                             at.tonumpy(_temp_bboxes[0]),
                #                             at.tonumpy(_temp_labels[0]).reshape(-1),
                #                             at.tonumpy(_temp_scores[0]))
                #

                # trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                # trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm')
                # roi confusion matrix
                # trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float())

        # fig = plt.figure()
        # ax1 = fig.add_subplot(2,1,1)
        # ax1.plot(normal_total_loss)
        # ax2 = fig.add_subplot(2,1,2)
        # ax2.plot(adv_total_loss)
        # fig.savefig("losses/both_loss{}".format(epoch))

        # eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num,
        #                    flagadvtrain=opt.flagadvtrain, adversary=atk)# adversary=adversary)

        # trainer.vis.plot('test_map', eval_result['map'])
        # lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        # log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_),
        #                                           str(eval_result['map']),
        #                                           str(trainer.get_meter_data()))
        # print(log_info)
        # # trainer.vis.log(log_info)
        #
        # if eval_result['map'] > best_map:
        #     best_map = eval_result['map']
        #     best_path = trainer.save(best_map=best_map)
        # if epoch == 9:
        #     trainer.load(best_path)
        #     trainer.faster_rcnn.scale_lr(opt.lr_decay)
        #     lr_ = lr_ * opt.lr_decay

        if epoch == 0:
            break

        if epoch == 13:
            break

    print("Total number of images is {}".format(total_imgs))
    print("True images is {}".format(true_imgs))
    print("Total time is {}".format(total_time))
    print("Avg time is {}".format(total_time/total_imgs))
Пример #29
0
def train(**kwargs):
    opt._parse(kwargs)

    dataset = Dataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  # pin_memory=True,
                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)

    trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
    for epoch in range(opt.epoch):
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            img, bbox, label = Variable(img), Variable(bbox), Variable(label)
            trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())

                # plot groud truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_,
                                     at.tonumpy(bbox_[0]),
                                     at.tonumpy(label_[0]))
                trainer.vis.img('gt_img', gt_img)

                # plot predicti bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True)
                pred_img = visdom_bbox(ori_img_,
                                       at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))
                trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm')
                # roi confusion matrix
                trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float())
        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)

        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(best_map=best_map)
        if epoch == 9:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)

        trainer.vis.plot('test_map', eval_result['map'])
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_),
                                                  str(eval_result['map']),
                                                  str(trainer.get_meter_data()))
        trainer.vis.log(log_info)
        if epoch == 13: 
            break
Пример #30
0
def train(**kwargs):
    opt._parse(kwargs)
    log = SummaryWriter(log_dir=opt.log_dir)

    dataset = Dataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset,
                                  batch_size=1,
                                  shuffle=True,
                                  pin_memory=True,
                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False,
                                       pin_memory=True
                                       )
    # 配置文件
    # cfg = VGConf()

    # 训练数据集
    # trainset = Dataset(cfg)
    # valset = Dataset(cfg, valid=True)
    # 加载数据
    # print("load data2..")
    # dataloader = DataLoader(dataloader, batch_size=1, shuffle=True,
    #                         pin_memory=True, num_workers=opt.num_workers)
    # valloader = DataLoader(test_dataloader, batch_size=1, shuffle=False,
    #                        pin_memory=True, num_workers=opt.num_workers)

    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)
    # trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
    lr_ = opt.lr
    idx = 0
    for epoch in range(opt.epoch):
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale) in enumerate(dataloader):
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            trainer.train_step(img, bbox, label, scale)

            # 获取损失值
            losses = trainer.get_meter_data()
            log.add_scalars(main_tag='Training(batch)',
                            tag_scalar_dict=losses,
                            global_step=idx)
            idx = idx+1

            if (ii + 1) % opt.plot_every == 0:
                # if os.path.exists(opt.debug_file):
                #     ipdb.set_trace()

                # plot loss
                # trainer.vis.plot_many(trainer.get_meter_data())
                print(trainer.get_meter_data())

                # plot groud truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                # gt_img = visdom_bbox(ori_img_,
                #                      at.tonumpy(bbox_[0]),
                #                      at.tonumpy(label_[0]))
                # trainer.vis.img('gt_img', gt_img)

                # plot predicti bboxes
                # _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True)
                # pred_img = visdom_bbox(ori_img_,
                #                        at.tonumpy(_bboxes[0]),
                #                        at.tonumpy(_labels[0]).reshape(-1),
                #                        at.tonumpy(_scores[0]))
                # trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                # trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm')
                # roi confusion matrix
                # trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float())
        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)
        log.add_scalar(tag='mAP', scalar_value=eval_result['map'], global_step=epoch)
        # trainer.vis.plot('test_map', eval_result['map'])
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_),
                                                  str(eval_result['map']),
                                                  str(trainer.get_meter_data()))
        print(log_info)
        # trainer.vis.log(log_info)

        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(best_map=best_map)
        if epoch == 9:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay

        if epoch == 13: 
            break
Пример #31
0
def eval(dataloader, faster_rcnn, trainer, dataset, test_num=10000):
    with torch.no_grad():
        print('Running validation')
        # Each predicted box is organized as :`(y_{min}, x_{min}, y_{max}, x_{max}),
        # Where y corresponds to the height and x to the width
        pred_bboxes, pred_labels, pred_scores = list(), list(), list()
        gt_bboxes, gt_labels, gt_difficults = list(), list(), list()
        image_ids = list()
        for ii, (imgs, sizes, gt_bboxes_, gt_labels_, gt_difficults_,
                 image_ids_) in tqdm(enumerate(dataloader), total=test_num):
            sizes = [
                sizes[0].detach().numpy().tolist()[0],
                sizes[1].detach().numpy().tolist()[0]
            ]
            pred_bboxes_, pred_labels_, pred_scores_ = faster_rcnn.predict(
                imgs, [sizes])
            # We have to add .copy() here to allow for the loaded image to be released after each iteration
            gt_bboxes += list(gt_bboxes_.numpy().copy())
            gt_labels += list(gt_labels_.numpy().copy())
            gt_difficults += list(gt_difficults_.numpy().copy())
            image_ids += list(image_ids_.numpy().copy())
            pred_bboxes += [pp.copy() for pp in pred_bboxes_]
            pred_labels += [pp.copy() for pp in pred_labels_]
            pred_scores += [pp.copy() for pp in pred_scores_]
            if ii == test_num: break

        result = eval_detection_voc(pred_bboxes,
                                    pred_labels,
                                    pred_scores,
                                    gt_bboxes,
                                    gt_labels,
                                    gt_difficults,
                                    use_07_metric=True)

        if opt.validate_only:
            save_path = '{}_detections.npz'.format(opt.load_path)
            np.savez(save_path,
                     pred_bboxes=pred_bboxes,
                     pred_labels=pred_labels,
                     pred_scores=pred_scores,
                     gt_bboxes=gt_bboxes,
                     gt_labels=gt_labels,
                     gt_difficults=gt_difficults,
                     image_ids=image_ids,
                     result=result)
        else:
            ori_img_ = inverse_normalize(at.tonumpy(imgs[0]))
            gt_img = visdom_bbox(ori_img_,
                                 at.tonumpy(gt_bboxes[-1]),
                                 at.tonumpy(gt_labels[-1]),
                                 label_names=dataset.get_class_names() +
                                 ['BG'])
            trainer.vis.img('test_gt_img', gt_img)

            # plot predicti bboxes
            pred_img = visdom_bbox(ori_img_,
                                   at.tonumpy(pred_bboxes[-1]),
                                   at.tonumpy(pred_labels[-1]).reshape(-1),
                                   at.tonumpy(pred_scores[-1]),
                                   label_names=dataset.get_class_names() +
                                   ['BG'])
            trainer.vis.img('test_pred_img', pred_img)

        del imgs, gt_bboxes_, gt_labels_, gt_difficults_, image_ids_, pred_bboxes_, pred_labels_, pred_scores_
        torch.cuda.empty_cache()
        return result