Exemplo n.º 1
0
def train(**kwargs):
    Config._parse(kwargs)

    dataset = Dataset(Config)
    print('load data')
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  # pin_memory=True,

                                  num_workers=Config.num_workers)
    testset = TestDataset(Config)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=2,
                                       shuffle=False, \
                                       # pin_memory=True

                                       )
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn, using_visdom=False).cuda()
    if Config.frc_ckpt_path:
        trainer.load(Config.frc_ckpt_path)
        print('load pretrained model from %s' % Config.frc_ckpt_path)

    trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
    for epoch in range(7):
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale,
                 ori_img) in tqdm(enumerate(dataloader)):
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            losses = trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % Config.plot_every == 0:
                if os.path.exists(Config.debug_file):
                    ipdb.set_trace()

                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())

                # plot groud truth bboxes
                ori_img_ = (img * 0.225 + 0.45).clamp(min=0, max=1) * 255
                gt_img = visdom_bbox(
                    at.tonumpy(ori_img_)[0],
                    at.tonumpy(bbox_)[0], label_[0].numpy())
                trainer.vis.img('gt_img', gt_img)

                # plot predicti bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict(
                    ori_img, visualize=True)
                pred_img = visdom_bbox(at.tonumpy(ori_img[0]),
                                       at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))
                trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()),
                                 win='rpn_cm')
                # roi confusion matrix
                trainer.vis.img(
                    'roi_cm',
                    at.totensor(trainer.roi_cm.conf, False).float())
        if epoch == 4:
            trainer.faster_rcnn.scale_lr(Config.lr_decay)

    eval_result = eval(test_dataloader, faster_rcnn, test_num=1e100)
    print('eval_result')
    trainer.save(mAP=eval_result['map'])
Exemplo n.º 2
0
import datetime

from utils.config import Config
from model.fasterrcnn import FasterRCNNTrainer, FasterRCNN
import tensorflow as tf
from utils.data import Dataset

physical_devices = tf.config.experimental.list_physical_devices('GPU')
assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
tf.config.experimental.set_memory_growth(physical_devices[0], True)

config = Config()
config._parse({})

print("读取数据中....")
dataset = Dataset(config)

frcnn = FasterRCNN(21, (7, 7))
print('model construct completed')
"""
feature_map, rpn_locs, rpn_scores, rois, roi_indices, anchor = frcnn.rpn(x, scale)
'''
feature_map : (1, 38, 50, 256) max= 0.0578503
rpn_locs    : (1, 38, 50, 36) max= 0.058497224
rpn_scores  : (1, 17100, 2) max= 0.047915094
rois        : (2000, 4) max= 791.0
roi_indices :(2000,) max= 0
anchor      : (17100, 4) max= 1154.0387
'''
bbox = bboxes
label = labels
Exemplo n.º 3
0
def train(**kwargs):
    Config._parse(kwargs)

    dataset = Dataset(Config)
    print('load data')
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  # pin_memory=True,

                                  num_workers=Config.num_workers)
    testset = TestDataset(Config)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=Config.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if Config.frc_ckpt_path:
        trainer.load(Config.frc_ckpt_path)
        print('load pretrained model from %s' % Config.frc_ckpt_path)
    trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
    lr_ = Config.lr
    for epoch in range(Config.epoch):
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % Config.plot_every == 0:
                if os.path.exists(Config.debug_file):
                    ipdb.set_trace()

                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())

                # plot groud truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]),
                                     at.tonumpy(label_[0]))
                trainer.vis.img('gt_img', gt_img)

                # plot predicti bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict(
                    [ori_img_], visualize=True)
                pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))
                trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()),
                                 win='rpn_cm')
                # roi confusion matrix
                trainer.vis.img(
                    'roi_cm',
                    at.totensor(trainer.roi_cm.conf, False).float())
        eval_result = eval(test_dataloader,
                           faster_rcnn,
                           test_num=Config.test_num)
        trainer.vis.plot('test_map', eval_result['map'])
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        log_info = 'lr:{}, map:{},loss:{}'.format(
            str(lr_), str(eval_result['map']), str(trainer.get_meter_data()))
        trainer.vis.log(log_info)

        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(best_map=best_map)
        if epoch == 9:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(Config.lr_decay)
            lr_ = lr_ * Config.lr_decay

        if epoch == 13:
            break