示例#1
0
def load_weight():
    sizes_list = myDect_config.sizes_list
    num_class = myDect_config.num_class
    ratios_list = myDect_config.ratios_list
    ctx = myDect_config.ctx
    net = SSD(num_class, sizes_list, ratios_list, ctx, prefix='ssd_')
    # net.load_params('./Model/mobilenet1.0_papercupDetect.param',ctx=ctx)
    # net.load_params('./results/mobilenet1.0/2/mobilenet1_papercupDetect.param',ctx=ctx)
    net.load_params('./Model/resnet18_papercupDetect.param', ctx=ctx)
    # net.load_params('./Model/vgg11bn29_512x512_data_sizes.param')
    return net
示例#2
0
def run_train(dataset, num_epochs=1):
    start_time = time.perf_counter()

    #model = VGGBase()
    model = SSD(n_classes=20)
    tf.print('prios_cxcy->', model.priors_cxcy)
    criterion = MultiBoxLoss(priors_cxcy=model.priors_cxcy)

    for _ in tf.data.Dataset.range(num_epochs):
        for idx, (
                images, boxes,
                labels) in enumerate(dataset):  # (batch_size (N), 300, 300, 3)

            images = np.array(images)
            labels = np.array(labels)
            boxes = np.array(list(boxes))

            if isprint:
                tf.print(type(images), type(labels), images.shape,
                         labels.shape)
            predicted_locs, predicted_socres = model(
                images)  # (N, 8732, 4), (N, 8732, n_classes)

            #if isprint:
            #        tf.print("============================================================")
            #        tf.print('predicted_locs->',predicted_locs.shape)
            ##        tf.print('predicted_socres->',predicted_socres.shape)
            #       tf.print('image ->',images.shape)
            #       tf.print('boxes->',boxes)
            #       tf.print('labels->',labels.shape)
            #       tf.print('labels->',labels)
            #find_jaccard_overlap  model.forward
            loss = criterion(predicted_locs, predicted_socres, boxes, labels)
            pass
            if idx == 0: break
        pass
    tf.print("실행 시간:", time.perf_counter() - start_time)
示例#3
0
# coding:utf-8
from __future__ import print_function
import torch
import torchvision.transforms as tfs
import torch.nn.functional as F

from model import SSD
from data import PriorBox
from config import opt

from PIL import Image, ImageDraw

net = SSD(opt)
net.load_state_dict(torch.load(opt.ckpt_path)['net'])
net.eval()

# 加载测试图片
img = Image.open('/home/j/MYSSD/pytorch-ssd-master/image/img1.jpg')
img1 = img.resize((300, 300))
transform = tfs.Compose([
    tfs.ToTensor(),
    tfs.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
])

img1 = transform(img1)

# 前向传播
loc, conf = net(img1[None, :, :, :])

# 将数据转换格式
prior_box = PriorBox(opt)
示例#4
0
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_set = VOC_loader(root="D:\Data\\voc\\2007",
                      year='2007_test',
                      image_set='train',
                      phase='TEST',
                      transform=transform)

# 3.test loader
test_loader = data.DataLoader(dataset=test_set,
                              batch_size=1,
                              collate_fn=test_set.collate_fn)

# 4. model load
net = SSD().to(device)
net.load_state_dict(torch.load('./saves/ssd.1.pth.tar'))
net.eval()

# 5. test
with torch.no_grad():

    for image, target in test_loader:

        # 사실 target 은 필요 없둠
        # print(image.size())  # image : torch.Size([1, 3, 300, 300])
        # print(len(target))  # target : list ( torch.Size([object_n, 5]) ) len(list) = 1
        # image

        # 각각의 tensor 들을 gpu 에 올리는 부분
        image = image.to(device)
示例#5
0
                                                  test_size=0.3,
                                                  random_state=2021)

train_dataset = TextDetectionDataset(X_train, y_train)
val_dataset = TextDetectionDataset(X_val, y_val)

train_dataloader = DataLoader(train_dataset,
                              batch_size=BATCH_SIZE,
                              collate_fn=collate_fn)
val_dataset = DataLoader(val_dataset,
                         batch_size=BATCH_SIZE,
                         collate_fn=collate_fn)

# 3.Model
N_EPOCHS = 10
model = SSD()
loss_fn = MultiBoxLoss()
optimizer = Adam(model.parameters())

train_losses = []
for epoch in range(N_EPOCHS):
    for X_batch, y_batch in tqdm(train_dataloader):
        locs, cls_scores = model(X_batch)
        loss = loss_fn(locs, cls_scores, y_batch)
        train_losses.append(loss.item())
        print('\nTrain batch loss: ', loss.item())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
示例#6
0
print('')
print('Setting:', opt)
print('')


#random_seed
random.seed(opt.seed)
np.random.seed(opt.seed)
torch.manual_seed(opt.seed)
torch.cuda.manual_seed_all(opt.seed)
PRNG = RandomState(opt.seed)


# model

model = SSD(opt.n_classes)
cfg = model.config
model.init_parameters(opt.pretrainedvgg)
criterion = MultiBoxLoss()
model.cuda()
criterion.cuda()
cudnn.benchmark = True
#print(cfg)
#print('')


#dataload
dataset = data.loader(cfg, opt.augmentation, opt.data_path ,PRNG)
print('size of dataset:', len(dataset))

# optimizer
示例#7
0
'''Convert pretrained VGG model to SSD.

VGG model download from PyTorch model zoo: https://download.pytorch.org/models/vgg16-397923af.pth
'''
import torch
from torchvision.models import vgg16
from model import SSD
from config import opt

vgg = torch.load('../checkpoints/vgg16-397923af.pth')

ssd = SSD(opt)
layer_indices = [0, 2, 5, 7, 10, 12, 14, 17, 19,
                 21]  # 这是因为只载入卷及层的权重,Relu, Maxpooing那些层是没有参数的

for layer_idx in layer_indices:
    ssd.base[layer_idx].weight.data = vgg['features.%d.weight' % layer_idx]
    ssd.base[layer_idx].bias.data = vgg['features.%d.bias' % layer_idx]

# [24,26,28]
ssd.conv5_1.weight.data = vgg['features.24.weight']
ssd.conv5_1.bias.data = vgg['features.24.bias']
ssd.conv5_2.weight.data = vgg['features.26.weight']
ssd.conv5_2.bias.data = vgg['features.26.bias']
ssd.conv5_3.weight.data = vgg['features.28.weight']
ssd.conv5_3.bias.data = vgg['features.28.bias']

torch.save(ssd.state_dict(), 'ssd.pth')  # state_dict() 仅保存和加载模型参数(推荐使用)
示例#8
0
def train():
    ssd_cfg = {
        'num_classes': 21,  # 背景クラスを含めた合計クラス数
        'input_size': 300,  # 画像の入力サイズ
        'bbox_aspect_num': [4, 6, 6, 6, 4, 4],  # 出力するDBoxのアスペクト比の種類
        'feature_maps': [38, 19, 10, 5, 3, 1],  # 各sourceの画像サイズ
        'pix_sizes': [8, 16, 32, 64, 100, 300],  # DBOXの大きさを決める
        'min_sizes': [30, 60, 111, 162, 213, 264],  # DBOXの大きさを決める
        'max_sizes': [60, 111, 162, 213, 264, 315],  # DBOXの大きさを決める
        'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
    }
    
    # SSDネットワークモデル
    net = SSD(phase="train", cfg=ssd_cfg)

    root_dir  = "./data/VOCdevkit/VOC2012/"
    train_dataset, val_dataset = load_data(root_dir)
    
    # SSDの初期の重みを設定
    # ssdのvgg部分に重みをロードする
    #vgg_weights = torch.load('./weights/vgg16_reducedfc.pth')
    #net.vgg.load_state_dict(vgg_weights)
    
    # ssdのその他のネットワークの重みはHeの初期値で初期化
    
    
    def weights_init(m):
        if isinstance(m, nn.Conv2d):
            init.kaiming_normal_(m.weight.data)
            if m.bias is not None:  # バイアス項がある場合
                nn.init.constant_(m.bias, 0.0)
    
    
    # Heの初期値を適用
    net.extras.apply(weights_init)
    net.loc.apply(weights_init)
    net.conf.apply(weights_init)

    # 損失関数の設定
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    criterion = MultiBoxLoss(jaccard_thresh=0.5, neg_pos=3, device=device)
    
    # 最適化手法の設定
    optimizer = optim.SGD(net.parameters(), lr=1e-3,
                          momentum=0.9, weight_decay=5e-4)
    num_epochs = 10

    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=32,
                                         shuffle=False, num_workers=1, collate_fn=od_collate_fn)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32,
                                         shuffle=False, num_workers=1, collate_fn=od_collate_fn)

    print("使用デバイス:", device)

    # ネットワークをGPUへ
    net.to(device)

    # ネットワークがある程度固定であれば、高速化させる
    torch.backends.cudnn.benchmark = True

    # イテレーションカウンタをセット
    iteration = 1
    epoch_train_loss = 0.0  # epochの損失和
    epoch_val_loss = 0.0  # epochの損失和
    logs = []

    # epochのループ
    for epoch in range(num_epochs+1):

        # 開始時刻を保存
        t_epoch_start = time.time()
        t_iter_start = time.time()

        print('-------------')
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-------------')

        # epochごとの訓練と検証のループ
        for phase in ['train', 'val']:
            if phase == 'train':
                net.train()  # モデルを訓練モードに
                print('(train)')
            else:
                if((epoch+1) % 10 == 0):
                    net.eval()   # モデルを検証モードに
                    print('-------------')
                    print('(val)')
                else:
                    # 検証は10回に1回だけ行う
                    continue

            # データローダーからminibatchずつ取り出すループ
            for images, targets in train_loader:

                # GPUが使えるならGPUにデータを送る
                images = images.to(device)
                targets = [ann.to(device)
                           for ann in targets]  # リストの各要素のテンソルをGPUへ

                # optimizerを初期化
                optimizer.zero_grad()

                iteration += 1
                # 順伝搬(forward)計算
                with torch.set_grad_enabled(phase == 'train'):
                    # 順伝搬(forward)計算
                    outputs = net(images)

                    # 損失の計算
                    loss_l, loss_c = criterion(outputs, targets)
                    loss = loss_l + loss_c

                    if phase == 'train':
                        loss.backward()  # 勾配の計算

                        # 勾配が大きくなりすぎると計算が不安定になるので、clipで最大でも勾配2.0に留める
                        nn.utils.clip_grad_value_(
                            net.parameters(), clip_value=2.0)

                        optimizer.step()  # パラメータ更新

                        if (iteration % 100 == 0):  # 100iterに1度、lossを表示
                            t_iter_finish = time.time()
                            duration = t_iter_finish - t_iter_start
                            print('イテレーション {} || Loss: {:.4f} || 10iter: {:.4f} sec.'.format(
                                iteration, loss.item(), duration))
                            t_iter_start = time.time()

                        epoch_train_loss += loss.item()

            for images, targets in val_loader:
                with torch.set_grad_enabled(phase == 'valid'):
                    # 順伝搬(forward)計算
                    outputs = net(images)

                # GPUが使えるならGPUにデータを送る
                images = images.to(device)
                targets = [ann.to(device)
                           for ann in targets]  # リストの各要素のテンソルをGPUへ
示例#9
0
'''Convert pretrained VGG model to SSD.

VGG model download from PyTorch model zoo: https://download.pytorch.org/models/vgg16-397923af.pth
'''
import torch

from model import SSD
from config import opt

vgg = torch.load('../checkpoints/vgg16-397923af.pth')

ssd = SSD(opt)
layer_indices = [0, 2, 5, 7, 10, 12, 14, 17, 19, 21]

for layer_idx in layer_indices:
    ssd.base[layer_idx].weight.data = vgg['features.%d.weight' % layer_idx]
    ssd.base[layer_idx].bias.data = vgg['features.%d.bias' % layer_idx]

# [24,26,28]
ssd.conv5_1.weight.data = vgg['features.24.weight']
ssd.conv5_1.bias.data = vgg['features.24.bias']
ssd.conv5_2.weight.data = vgg['features.26.weight']
ssd.conv5_2.bias.data = vgg['features.26.bias']
ssd.conv5_3.weight.data = vgg['features.28.weight']
ssd.conv5_3.bias.data = vgg['features.28.bias']

torch.save(ssd.state_dict(), 'ssd.pth')
示例#10
0
# 构建trainset trainloader testset testloader
trainset = ImageSet(opt, transform, is_train=True)
trainloader = torch.utils.data.DataLoader(trainset,
                                          batch_size=opt.batch_size,
                                          shuffle=True,
                                          num_workers=8)

testset = ImageSet(opt, transform, is_train=False)
testloader = torch.utils.data.DataLoader(testset,
                                         batch_size=opt.batch_size,
                                         shuffle=False,
                                         num_workers=8)
print('## Data preparation finish ##')

print('## Building net : SSD300 ##')
net = SSD(opt)
# 是否加载之前保存的模型
if args.resume:
    print(' # Resuming from checkpoint # ')
    checkpoint = torch.load(opt.ckpt_path)
    net.load_state_dict(checkpoint['net'])
    best_loss = checkpoint['loss']
    start_epoch = checkpoint['epoch']
# 加载预训练的模型
else:
    print(' # Loading pretrained model # ')
    net.load_state_dict(torch.load(opt.pretrained_model))

criterion = MultiBoxLoss()

if use_cuda:
示例#11
0
                batch_num += 1

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    predicted_locs, predicted_scores, _ = model(augmented_lidar_cam_coords)
                    loss = criterion(predicted_locs, predicted_scores, boxes, classes)
                    
                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                        
                running_loss += loss.item() * bat_size
    return model #, val_acc_history
    
device = torch.device("cuda:0")
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
ssd = SSD(resnet_type=34, n_classes=2).to(device)
trainset = KittiDataset(root="/Users/xymbiotec/Desktop/painting-master/work", mode="training", valid=False)
valset = KittiDataset(root="/Users/xymbiotec/Desktop/painting-master/work", mode="training", valid=True)

datasets = {'train': trainset, 'val': valset}
dataloaders_dict = {x: DataLoader(datasets[x], batch_size=4, shuffle=True, collate_fn=datasets[x].collate_fn, num_workers=0, drop_last=True) for x in ['train', 'val']}

optimizer_ft = torch.optim.SGD(ssd.parameters(), lr=0.0001, momentum=0.9)
criterion = MultiBoxLoss(priors_cxcy=ssd.priors_cxcy).to(device)

ssd = train_model(ssd, dataloaders_dict, criterion, optimizer_ft, num_epochs=10)
torch.save(ssd.state_dict(), './pointpillars.pth')
示例#12
0
class DEC_Module(object):
    def __init__(self, multigpu, resume):
        self.model = SSD(num_classes=cfg.num_classes,
                         num_blocks=cfg.mbox,
                         top_k=cfg.top_k,
                         conf_thresh=cfg.conf_thresh,
                         nms_thresh=cfg.nms_thresh,
                         variance=cfg.variance)
        if resume is not None:
            print('Resuming training weights from {} ...'.format(resume))
            resume_dict = torch.load(resume)
            resume_dict_update = {}
            for k in resume_dict:
                if k.startswith('module') and not k.startswith('module_list'):
                    resume_dict_update[k[7:]] = resume_dict[k]
                else:
                    resume_dict_update[k] = resume_dict[k]
            self.model.load_state_dict(resume_dict_update)
        else:
            resnet = "resnet101"
            print('Resuming weights from {} ...'.format(resnet))
            pre_trained_dict = model_zoo.load_url(model_urls[resnet])
            model_dict = self.model.state_dict()
            updated_dict = {
                k: v
                for k, v in pre_trained_dict.items() if k in model_dict
            }
            model_dict.update(updated_dict)
            self.model.load_state_dict(model_dict)

        self.multigpu = multigpu

    def train(self, vis=False):
        print("begin training....")

        if not os.path.exists('weights'):
            os.mkdir('weights')

        # Device settings
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        device1 = torch.device(
            "cuda:1" if torch.cuda.is_available() else "cpu")

        if self.multigpu:
            if torch.cuda.device_count() > 1:
                print("Let's use", torch.cuda.device_count(), "GPUs!")
                self.model = nn.DataParallel(self.model)
        self.model = self.model.to(device)

        eval_model = SSD(num_classes=cfg.num_classes,
                         num_blocks=cfg.mbox,
                         top_k=cfg.top_k,
                         conf_thresh=cfg.conf_thresh,
                         nms_thresh=cfg.nms_thresh,
                         variance=cfg.variance)
        eval_model = eval_model.to(device1)

        for item in self.model.parameters():
            print(item.requires_grad)

        total_epoch = cfg.epoch

        criterion = DEC_loss(num_classes=cfg.num_classes,
                             variances=cfg.variance,
                             device=device)

        optimizer = optim.SGD(params=filter(lambda p: p.requires_grad,
                                            self.model.parameters()),
                              lr=cfg.init_lr,
                              momentum=0.9,
                              weight_decay=cfg.weight_decay)

        # scheduler = lr_scheduler.StepLR(optimizer, step_size=cfg.lr_decay_epoch, gamma=0.1)
        scheduler = lr_scheduler.MultiStepLR(optimizer,
                                             milestones=cfg.milestones,
                                             gamma=0.1)

        print('Loading Datasets...')
        dsets = PASCALVOC(root=cfg.root,
                          image_sets=cfg.train_sets,
                          transform=transforms.DEC_transforms(
                              phase='train',
                              size=cfg.img_size,
                              mean=cfg.means,
                              std=cfg.std))

        dsets_val = PASCALVOC(root=cfg.root,
                              image_sets=cfg.test_sets,
                              transform=transforms.DEC_transforms(
                                  phase='val',
                                  size=cfg.img_size,
                                  mean=cfg.means,
                                  std=cfg.std))

        dset_loaders = torch.utils.data.DataLoader(
            dsets,
            cfg.batch_size,
            num_workers=4,
            shuffle=True,
            collate_fn=detection_collate,
            pin_memory=True)
        if vis:
            viewDatasets_DEC(dset_loaders)

        train_loss_dict = []
        mAP_dict = []
        for epoch in range(total_epoch):
            print('Epoch {}/{}'.format(epoch, total_epoch - 1))
            print('-' * 10)
            for phase in ['train', 'val']:
                if phase == 'train':
                    scheduler.step()
                    self.model.train()
                    running_loss = 0.0
                    for data in dset_loaders:
                        inputs, target = data
                        inputs = inputs.to(device)
                        target = [item.to(device) for item in target]

                        optimizer.zero_grad()

                        # forward
                        # track history if only in train
                        with torch.set_grad_enabled(phase == 'train'):
                            outputs = self.model(inputs, phase)
                            # backprop
                            loss_l, loss_c = criterion(outputs, target)
                            loss = loss_l + loss_c

                            loss.backward()
                            optimizer.step()

                        running_loss += loss.item()

                    epoch_loss = running_loss / len(dsets)
                    print('{} Loss: {:.6}'.format(epoch, epoch_loss))

                    train_loss_dict.append(epoch_loss)
                    np.savetxt('train_loss.txt', train_loss_dict, fmt='%.6f')
                    if epoch % 5 == 0:
                        torch.save(
                            self.model.state_dict(),
                            os.path.join(
                                'weights', '{:d}_{:.4f}_model.pth'.format(
                                    epoch, epoch_loss)))
                    torch.save(self.model.state_dict(),
                               os.path.join('weights', 'end_model.pth'))

                else:
                    if epoch % 5 == 0:
                        model_dict = self.model.state_dict()
                        val_dict = {k[7:]: v for k, v in model_dict.items()}
                        eval_model.load_state_dict(val_dict)
                        maps = self.eval(device1, eval_model, dsets_val)
                        mAP_dict.append(maps)
                        np.savetxt('mAP.txt', mAP_dict, fmt='%.6f')

    def test(self):
        print('testing, evaluation mode...')
        self.model.eval()

        print('loading data...')
        dsets = PASCALVOC(root=cfg.root,
                          image_sets=cfg.test_sets,
                          transform=transforms.DEC_transforms(
                              phase='val',
                              size=cfg.img_size,
                              mean=cfg.means,
                              std=cfg.std))

        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        self.model = self.model.to(device)

        num_imgs = len(dsets)
        test_timer = Timer()
        cv2.namedWindow('img')
        for i in range(num_imgs):
            print('testing {}...'.format(dsets.img_ids[i]))
            img, target = dsets.__getitem__(i)
            ori_img = cv2.imread(dsets._imgpath % dsets.img_ids[i])
            h, w, c = ori_img.shape

            x = img.unsqueeze(0)
            x = x.to(device)

            test_timer.tic()
            detections = self.model(x, 'test')
            detect_time = test_timer.toc(average=False)
            print('test time: {}'.format(detect_time))
            for j in range(1, detections.size(1)):
                dets = detections[0, j, :]
                mask = dets[:, 0].gt(0.).expand(5, dets.size(0)).t()
                dets = torch.masked_select(dets, mask).view(-1, 5)
                if dets.shape[0] == 0:
                    continue
                if j:
                    boxes = dets[:, 1:]
                    boxes[:, 0] *= h
                    boxes[:, 1] *= w
                    boxes[:, 2] *= h
                    boxes[:, 3] *= w
                    scores = dets[:, 0].cpu().numpy()
                    for box, score in zip(boxes, scores):
                        y1, x1, y2, x2 = box
                        y1 = int(y1)
                        x1 = int(x1)
                        y2 = int(y2)
                        x2 = int(x2)

                        cv2.rectangle(ori_img, (x1, y1), (x2, y2), (0, 255, 0),
                                      2, 2)
                        cv2.putText(ori_img,
                                    cfg.VOC_CLASSES[int(j)] + "%.2f" % score,
                                    (x1, y1 + 20), cv2.FONT_HERSHEY_SIMPLEX,
                                    0.6, (255, 0, 255))
            cv2.imshow('img', ori_img)
            k = cv2.waitKey(0)
            if k & 0xFF == ord('q'):
                cv2.destroyAllWindows()
                exit()
        cv2.destroyAllWindows()
        exit()

    def eval(self, device, eval_model, dsets):
        # print('evaluation mode...')
        # self.model.eval()
        eval_model.eval()
        output_dir = cfg.output_dir
        if not os.path.exists(output_dir):
            os.mkdir(output_dir)
        else:
            shutil.rmtree(output_dir)
            os.mkdir(output_dir)

        num_imgs = len(dsets)
        total_time = 0

        det_file = os.path.join(output_dir, 'detections.pkl')
        # print('Detecting bounding boxes...')
        all_boxes = [[[] for _ in range(num_imgs)]
                     for _ in range(cfg.num_classes)]

        _t = {'im_detect': Timer(), 'misc': Timer()}

        for i in range(num_imgs):
            img, target = dsets.__getitem__(i)
            ori_img = cv2.imread(dsets._imgpath % dsets.img_ids[i])
            h, w, c = ori_img.shape

            x = img.unsqueeze(0)
            x = x.to(device)

            _t['im_detect'].tic()
            detections = eval_model(x, 'test')
            detect_time = _t['im_detect'].toc(average=False)

            # ignore the background boxes
            for j in range(1, detections.size(1)):
                dets = detections[0, j, :]
                mask = dets[:, 0].gt(0.).expand(5, dets.size(0)).t()
                dets = torch.masked_select(dets, mask).view(-1, 5)
                if dets.shape[0] == 0:
                    continue
                boxes = dets[:, 1:]
                boxes[:, 0] *= h
                boxes[:, 1] *= w
                boxes[:, 2] *= h
                boxes[:, 3] *= w
                scores = dets[:, 0].cpu().numpy()
                cls_dets = np.hstack((boxes.cpu().numpy(), scores[:, np.newaxis])) \
                    .astype(np.float32, copy=False)
                all_boxes[j][i] = cls_dets

            # print('img-detect: {:d}/{:d} {:.3f}s'.format(i + 1, num_imgs, detect_time))
            total_time += detect_time

        with open(det_file, 'wb') as f:
            pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)
            f.close()

        print('Saving the results...')
        for cls_ind, cls in enumerate(cfg.labelmap):
            # print('Writing {:s} VOC results file'.format(cls))
            filename = dec_eval.get_voc_results_file_template('test', cls)
            with open(filename, 'wt') as f:
                for im_ind, index in enumerate(dsets.img_ids):
                    dets = all_boxes[cls_ind + 1][im_ind]
                    if dets == []:
                        continue
                    # the VOCdevkit expects 1-based indices
                    for k in range(dets.shape[0]):
                        f.write(
                            '{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.format(
                                index[1], dets[k, -1], dets[k, 0] + 1,
                                dets[k, 1] + 1, dets[k, 2] + 1,
                                dets[k, 3] + 1))

        # print('Evaluating detections....')
        print('average time is {}'.format(float(total_time) / num_imgs))
        maps = dec_eval.do_python_eval(output_dir=output_dir, use_07=True)
        return maps

    def eval_single(self):
        print('evaluation mode...')
        self.model.eval()

        print('loading data...')
        dsets = PASCALVOC(root=cfg.root,
                          image_sets=cfg.test_sets,
                          transform=transforms.DEC_transforms(
                              phase='val',
                              size=cfg.img_size,
                              mean=cfg.means,
                              std=cfg.std))

        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        self.model = self.model.to(device)

        output_dir = cfg.output_dir
        if not os.path.exists(output_dir):
            os.mkdir(output_dir)
        else:
            shutil.rmtree(output_dir)
            os.mkdir(output_dir)

        num_imgs = len(dsets)

        det_file = os.path.join(output_dir, 'detections.pkl')
        print('Detecting bounding boxes...')
        all_boxes = [[[] for _ in range(num_imgs)]
                     for _ in range(cfg.num_classes)]

        _t = {'im_detect': Timer(), 'misc': Timer()}
        total_time = 0
        for i in range(num_imgs):
            img, target = dsets.__getitem__(i)
            ori_img = cv2.imread(dsets._imgpath % dsets.img_ids[i])
            h, w, c = ori_img.shape

            x = img.unsqueeze(0)
            x = x.to(device)

            _t['im_detect'].tic()
            detections = self.model(x, 'test')
            detect_time = _t['im_detect'].toc(average=False)
            total_time += detect_time
            # ignore the background boxes
            for j in range(1, detections.size(1)):
                dets = detections[0, j, :]
                mask = dets[:, 0].gt(0.).expand(5, dets.size(0)).t()
                dets = torch.masked_select(dets, mask).view(-1, 5)
                if dets.shape[0] == 0:
                    continue
                boxes = dets[:, 1:]
                boxes[:, 0] *= h
                boxes[:, 1] *= w
                boxes[:, 2] *= h
                boxes[:, 3] *= w
                boxes[:, 0] = np.maximum(0., boxes[:, 0])
                boxes[:, 1] = np.maximum(0., boxes[:, 1])
                boxes[:, 2] = np.minimum(h, boxes[:, 2])
                boxes[:, 3] = np.minimum(w, boxes[:, 3])
                scores = dets[:, 0].cpu().numpy()
                cls_dets = np.hstack((boxes.cpu().numpy(), scores[:, np.newaxis])) \
                    .astype(np.float32, copy=False)
                all_boxes[j][i] = cls_dets

            print('img-detect: {:d}/{:d} {:.3f}s'.format(
                i + 1, num_imgs, detect_time))

        with open(det_file, 'wb') as f:
            pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)
            f.close()
        print('average time is {}'.format(float(total_time) / num_imgs))
        print('Saving the results...')
        for cls_ind, cls in enumerate(cfg.labelmap):
            print('Writing {:s} VOC results file'.format(cls))
            filename = dec_eval.get_voc_results_file_template('test', cls)
            with open(filename, 'wt') as f:
                for im_ind, index in enumerate(dsets.img_ids):
                    dets = all_boxes[cls_ind + 1][im_ind]
                    if dets == []:
                        continue
                    # the VOCdevkit expects 1-based indices
                    for k in range(dets.shape[0]):
                        f.write(
                            '{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.format(
                                index[1], dets[k, -1], dets[k, 0] + 1,
                                dets[k, 1] + 1, dets[k, 2] + 1,
                                dets[k, 3] + 1))

        print('Evaluating detections....')

        dec_eval.do_python_eval(output_dir=output_dir, use_07=True)
示例#13
0
from torch.autograd import Variable

from evaluation import eval_voc_detection
from model import SSD
import data

#setting
parser = argparse.ArgumentParser(description='Single Shot Multi Detector')
opts.setting(parser)
opt = parser.parse_args()

#random_seed
PRNG = RandomState(opt.seed)

# model
model = SSD(opt.n_classes)
cfg = model.config
decoder = MakeBox(cfg)
model.load_state_dict(torch.load(opt.weight_path))
model.cuda()
cudnn.benchmark = True

#dataload
dataset = data.loader_test(cfg, opt.data_path, PRNG)
print('size of dataset:', len(dataset))


def test():
    print('testing....')
    pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels = [], [], [], [], []
    for i in range(len(dataset)):
示例#14
0
    print('{:d} roidb entries'.format(len(roidb)))

    input_dir = args.load_dir + "/" + args.net + "/" + args.dataset
    if not os.path.exists(input_dir):
        raise Exception(
            'There is no input directory for loading network from ' +
            input_dir)

    load_name = os.path.join(
        input_dir, args.frame + '_{}_{}_{}_{}.pth'.format(
            args.checksession, args.checkepoch, args.checkpoint, args.GPU))

    # initilize the network here.
    if args.frame == 'ssd':
        if args.net == 'vgg16':
            Network = SSD.vgg16(imdb.classes)
        else:
            print("network is not defined")
            pdb.set_trace()
    elif args.frame == 'ssd_vmrn':
        if args.net == 'vgg16':
            Network = SSD_VMRN.vgg16(imdb.classes)
        elif args.net == 'res50':
            Network = SSD_VMRN.resnet(imdb.classes, layer_num=50)
        elif args.net == 'res101':
            Network = SSD_VMRN.resnet(imdb.classes, layer_num=101)
        else:
            print("network is not defined")
            pdb.set_trace()
    if args.frame == 'fpn':
        if args.net == 'res101':
示例#15
0
def predict_data(json_dir, path_data_dict, model_path_list, img_size,
                 model_type, quantized):
    """
    Основной цикл программы.

    1. Создается тестовый датасет
    2. Берется модель из списка
    3. Модель прогонятеся по тестовому датасету
    4. Результаты записываются в .jsonы
    5. цикл повторяется с 2ого по 4ый пункт пока не прогонятся все модели.
    """

    print("Загружаю датасет")
    test_datasets = create_dataset(path_data_dict, img_size)
    print("Датасет загружен")
    print("=" * 80)
    print(f"количество tp изображений: {test_datasets['tp'][0].shape[0]}")
    print(f"количество tp изображений: {test_datasets['bg'][0].shape[0]}")
    os.mkdir(json_dir)
    for model_path in model_path_list:
        model_name = model_path.split('\\')[-1]
        print(f"\nИмя модели: {model_name}")
        print('Прогон тестового набора...')
        json_model_dir_path = os.path.join(json_dir, model_name.split('.')[0])
        os.mkdir(json_model_dir_path)
        if model_type == '2head':
            if quantized == '+':
                q = True
            else:
                q = False

            model = Model2head(model_path, q=q)
        else:
            model = SSD(model_path, img_size)
        for key, dataset in test_datasets.items():
            json_save_path = os.path.join(json_model_dir_path, key)
            os.mkdir(json_save_path)
            json_writer = JsonWriter(json_save_path)
            x_test = dataset[0]
            y_test = dataset[1]
            img_names = dataset[2]
            for i in range(x_test.shape[0]):
                img_array = x_test[i]
                gt_true = y_test[i]
                img_name = img_names[i]
                if model_type == '2head':
                    boxes, cls_predictions = model.test(img_array)
                    json_writer.write(
                        key,
                        model_name,
                        img_name,
                        boxes,
                        cls_predictions,
                        gt_true.numpy().tolist(),
                    )
                else:
                    boxes, cls_predictions = model.test(img_array)
                    json_writer.write(
                        key,
                        model_name,
                        img_name,
                        boxes.numpy().tolist(),
                        cls_predictions.numpy().tolist(),
                        gt_true.numpy().tolist(),
                    )
示例#16
0
  # load data
  print("\n==> load data ...\n")
  _, trainloader, _, testloader = load_data(args.batch_size, args.dataset)

  # create model
  print("\n==> prepare model ...\n")
  if args.resume:
    checkpoint = torch.load('checkpoint/train/ckpt.t7')
    net = checkpoint['net']
    best_loss = checkpoint['loss']
    start_epoch = checkpoint['epoch'] + 1
    start_time = checkpoint['start_time']
    print("start_epoch:", start_epoch)
    print("start_time:", start_time)
  else:
    net = SSD(num_classes=config[args.dataset]['num_classes']+1)  # 1 for background    
  net.cuda()
  cudnn.benchmark = True
  
  # train
  print("\n==> train ...\n")
  for i in range(start_epoch, start_epoch+100):
    train(net, args.lr, trainloader, i)
    loss = test(net, testloader, i)
    if i % 2 == 1:
      args.lr *= 0.9
      print("learning rate =", args.lr)
    if loss < best_loss:
      best_loss = loss
      print('\n==> saving ...\n')
      state = {
示例#17
0
	"train": train_dataloader,
	"val": val_dataloader
}

# network
cfg = {
	"num_classes": 21, # VOC data
	"input_size": 300, # SSD300
	"bbox_aspect_num": [4, 6, 6, 6, 4, 4], 
	"feature_maps": [38, 19, 10, 5, 3, 1],
	"steps": [8, 16, 32, 64, 100, 300],
	"min_size": [30, 60, 111, 162, 213, 264],
	"max_size": [60, 111, 162, 213, 264, 315],
	"aspect_ratios": [[2], [2, 3], [2, 3], [2, 3], [2], [2]]
}
net = SSD(phase="train", cfg=cfg)
vgg_weights = torch.load("./data/weights/vgg16_reducedfc.pth")
net.vgg.load_state_dict(vgg_weights)

def weights_init(m):
    if isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight.data)
        if m.bias is not None:
            nn.init.constant_(m.bias, 0.0)

# He init
net.extras.apply(weights_init)
net.loc.apply(weights_init)
net.conf.apply(weights_init)

# MultiBoxLoss
示例#18
0
    def train(self, vis=False):
        print("begin training....")

        if not os.path.exists('weights'):
            os.mkdir('weights')

        # Device settings
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        device1 = torch.device(
            "cuda:1" if torch.cuda.is_available() else "cpu")

        if self.multigpu:
            if torch.cuda.device_count() > 1:
                print("Let's use", torch.cuda.device_count(), "GPUs!")
                self.model = nn.DataParallel(self.model)
        self.model = self.model.to(device)

        eval_model = SSD(num_classes=cfg.num_classes,
                         num_blocks=cfg.mbox,
                         top_k=cfg.top_k,
                         conf_thresh=cfg.conf_thresh,
                         nms_thresh=cfg.nms_thresh,
                         variance=cfg.variance)
        eval_model = eval_model.to(device1)

        for item in self.model.parameters():
            print(item.requires_grad)

        total_epoch = cfg.epoch

        criterion = DEC_loss(num_classes=cfg.num_classes,
                             variances=cfg.variance,
                             device=device)

        optimizer = optim.SGD(params=filter(lambda p: p.requires_grad,
                                            self.model.parameters()),
                              lr=cfg.init_lr,
                              momentum=0.9,
                              weight_decay=cfg.weight_decay)

        # scheduler = lr_scheduler.StepLR(optimizer, step_size=cfg.lr_decay_epoch, gamma=0.1)
        scheduler = lr_scheduler.MultiStepLR(optimizer,
                                             milestones=cfg.milestones,
                                             gamma=0.1)

        print('Loading Datasets...')
        dsets = PASCALVOC(root=cfg.root,
                          image_sets=cfg.train_sets,
                          transform=transforms.DEC_transforms(
                              phase='train',
                              size=cfg.img_size,
                              mean=cfg.means,
                              std=cfg.std))

        dsets_val = PASCALVOC(root=cfg.root,
                              image_sets=cfg.test_sets,
                              transform=transforms.DEC_transforms(
                                  phase='val',
                                  size=cfg.img_size,
                                  mean=cfg.means,
                                  std=cfg.std))

        dset_loaders = torch.utils.data.DataLoader(
            dsets,
            cfg.batch_size,
            num_workers=4,
            shuffle=True,
            collate_fn=detection_collate,
            pin_memory=True)
        if vis:
            viewDatasets_DEC(dset_loaders)

        train_loss_dict = []
        mAP_dict = []
        for epoch in range(total_epoch):
            print('Epoch {}/{}'.format(epoch, total_epoch - 1))
            print('-' * 10)
            for phase in ['train', 'val']:
                if phase == 'train':
                    scheduler.step()
                    self.model.train()
                    running_loss = 0.0
                    for data in dset_loaders:
                        inputs, target = data
                        inputs = inputs.to(device)
                        target = [item.to(device) for item in target]

                        optimizer.zero_grad()

                        # forward
                        # track history if only in train
                        with torch.set_grad_enabled(phase == 'train'):
                            outputs = self.model(inputs, phase)
                            # backprop
                            loss_l, loss_c = criterion(outputs, target)
                            loss = loss_l + loss_c

                            loss.backward()
                            optimizer.step()

                        running_loss += loss.item()

                    epoch_loss = running_loss / len(dsets)
                    print('{} Loss: {:.6}'.format(epoch, epoch_loss))

                    train_loss_dict.append(epoch_loss)
                    np.savetxt('train_loss.txt', train_loss_dict, fmt='%.6f')
                    if epoch % 5 == 0:
                        torch.save(
                            self.model.state_dict(),
                            os.path.join(
                                'weights', '{:d}_{:.4f}_model.pth'.format(
                                    epoch, epoch_loss)))
                    torch.save(self.model.state_dict(),
                               os.path.join('weights', 'end_model.pth'))

                else:
                    if epoch % 5 == 0:
                        model_dict = self.model.state_dict()
                        val_dict = {k[7:]: v for k, v in model_dict.items()}
                        eval_model.load_state_dict(val_dict)
                        maps = self.eval(device1, eval_model, dsets_val)
                        mAP_dict.append(maps)
                        np.savetxt('mAP.txt', mAP_dict, fmt='%.6f')
示例#19
0
    "pottedplant", "sheep", "sofa", "train", "tvmonitor"
]

cfg = {
    "num_classes": 21,  #VOC data include 20 class + 1 background class
    "input_size": 300,  #SSD300
    "bbox_aspect_num": [4, 6, 6, 6, 4,
                        4],  # Tỷ lệ khung hình cho source1->source6`
    "feature_maps": [38, 19, 10, 5, 3, 1],
    "steps": [8, 16, 32, 64, 100, 300],  # Size of default box
    "min_size": [30, 60, 111, 162, 213, 264],  # Size of default box
    "max_size": [60, 111, 162, 213, 264, 315],  # Size of default box
    "aspect_ratios": [[2], [2, 3], [2, 3], [2, 3], [2], [2]]
}

net = SSD(phase="inference", cfg=cfg)
net_weights = torch.load("./data/weights/ssd300_100.pth",
                         map_location={"cuda:0": "cpu"})
net.load_state_dict(net_weights)


def show_predict(img_file_path):
    img = cv2.imread(img_file_path)

    color_mean = (104, 117, 123)
    input_size = 300
    transform = DataTransform(input_size, color_mean)

    phase = "val"
    img_tranformed, boxes, labels = transform(img, phase, "", "")
    img_tensor = torch.from_numpy(img_tranformed[:, :,
示例#20
0
# 3. data set 정의
transforms_list = photometric_distort()
transform = transforms.Compose(transforms_list)
root_dir = "D:\Data\\voc\\2012"

train_set = loader.VOC_loader(root_dir, transform=transform)

# 4. data loader 정의
train_loader = torch.utils.data.DataLoader(train_set,
                                           batch_size=32,
                                           collate_fn=train_set.collate_fn,
                                           shuffle=True,
                                           num_workers=0)

# 5. model 정의
net = SSD().to(device)
net.train()

# 6. loss 정의
criterion = MultiBoxLoss().to(device)

# 7. optimizer 정의
optimizer = optim.Adam(net.parameters(), lr=0.001)
total_step = len(train_loader)

# 8. train
for epoch in range(30):

    epoch_time = time.time()
    for i, (images, labels) in enumerate(train_loader):
示例#21
0
dataloader_dict = {"train": train_dataloader, "val": val_dataloader}

# network
cfg = {
    "num_classes": 21,  #VOC data include 20 class + 1 background class
    "input_size": 300,  #SSD300
    "bbox_aspect_num": [4, 6, 6, 6, 4,
                        4],  # Tỷ lệ khung hình cho source1->source6`
    "feature_maps": [38, 19, 10, 5, 3, 1],
    "steps": [8, 16, 32, 64, 100, 300],  # Size of default box
    "min_size": [30, 60, 111, 162, 213, 264],  # Size of default box
    "max_size": [60, 111, 162, 213, 264, 315],  # Size of default box
    "aspect_ratios": [[2], [2, 3], [2, 3], [2, 3], [2], [2]]
}

net = SSD(phase="train", cfg=cfg)
vgg_weights = torch.load("./data/weights/vgg16_reducedfc.pth")
net.vgg.load_state_dict(vgg_weights)


def weights_init(m):
    if isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight.data)
        if m.bias is not None:
            nn.init.constant_(m.bias, 0.0)


# He init
net.extras.apply(weights_init)
net.loc.apply(weights_init)
net.conf.apply(weights_init)
示例#22
0
def train(seed, resume=False, opt_level='O1', resize_dims=(300, 300)):
    clear_cuda()
    torch.manual_seed(seed)
    data_folder = "./output/"
    keep_difficult = True
    n_classes = len(label_map)

    other_checkpoint = data_folder+"oth_checkpoint.pt"
    model_checkpoint = data_folder+"checkpoint.pt"
    early_stopping = EarlyStopping(save_model_name=model_checkpoint, patience=3)
    pin_memory = True

    batch_size = 8
    accumulation_factor = 6
    iterations = 100000
    workers = 4*torch.cuda.device_count()
    lr = 1e-3
    decay_lr_at = [80000]
    decay_lr_to = 0.2

    early_stopper_lr_decrease = 0.5
    early_stopper_lr_decrease_count = 7

    momentum = 0.9
    weight_decay = 5e-3
    grad_clip = None
    torch.backends.cudnn.benchmark = True

    optimizer = None
    start_epoch = None

    history = pd.DataFrame()
    history.index.name="Epoch"
    history_path = "./output/HISTORY"+"_SEED_{}".format(seed)+".csv"
    model = SSD(n_classes)
    biases = list()
    not_biases = list()

    for param_name, param in model.named_parameters():
        if param.requires_grad:
            if param_name.endswith('.bias'):
                biases.append(param)
            else:
                not_biases.append(param)

    # optimizer = RAdam(params=[{'params':biases,'lr':2*lr},{'params':not_biases}], lr=lr, weight_decay=weight_decay)
    # optimizer = torch.optim.RMSprop(params=[{'params':biases,'lr':2*lr},{'params':not_biases}], lr=lr, weight_decay=weight_decay, momentum=momentum)
    # optimizer = torch.optim.SGD(params=[{'params':biases,'lr':2*lr},{'params':not_biases}], lr=lr, weight_decay=weight_decay, momentum=momentum)
    optimizer = apex.optimizers.FusedLAMB(params=[{'params':biases,'lr':2*lr},{'params':not_biases}], lr=lr, weight_decay=weight_decay)#, momentum=momentum)
    
    model = model.to(device)
    criterion = MultiBoxLoss(model.priors_cxcy).to(device)
    model, optimizer = amp.initialize(model, optimizer, opt_level=opt_level, loss_scale=1.0)

    start_epoch = 0
    

    if resume and os.path.exists(other_checkpoint):
        ocheckpt = torch.load(other_checkpoint, map_location=device)
        optimizer.load_state_dict(ocheckpt['optimizer_state'])
        start_epoch = ocheckpt['epoch'] + 1
        lr = get_lr(optimizer)
        history = pd.read_csv(history_path, index_col="Epoch")
        model.load_state_dict(torch.load(model_checkpoint, map_location=device))
        # adjust_learning_rate(optimizer, 0.1)
        lr = get_lr(optimizer)

        for state in optimizer.state.values():
            for k, v in state.items():
                if isinstance(v, torch.Tensor):
                    state[k] = v.cuda()

        amp.load_state_dict(ocheckpt['amp_state'])





    train_dataset = PascalDataset(data_folder, split="train", keep_difficult=keep_difficult, resize_dims=resize_dims)
    train_len = int(0.85*len(train_dataset))
    valid_len = len(train_dataset) - train_len

    train_data, valid_data = torch.utils.data.dataset.random_split(train_dataset, [train_len, valid_len])
    train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, collate_fn=train_dataset.collate_fn,
                                               num_workers=workers, pin_memory=pin_memory)
    valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=batch_size, shuffle=False, collate_fn=train_dataset.collate_fn,
                                               num_workers=workers, pin_memory=pin_memory)



    

    total_epochs = iterations//(len(train_dataset)//32)
    decay_lr_at = [it//(len(train_dataset)//32) for it in decay_lr_at]

    total_epochs = 125
    decay_lr_at = [100]


    print("Training For: {}".format(total_epochs))
    print("Decay LR at:", decay_lr_at)

    for epoch in range(start_epoch, total_epochs):
        if epoch in decay_lr_at:
            adjust_learning_rate(optimizer, decay_lr_to)
            lr*=decay_lr_to
            print("Learning Rate Adjusted")
        st = time.time()

        print("EPOCH: {}/{} -- Current LR: {}".format(epoch+1, total_epochs, lr))
        clear_cuda()
        tl,ta = train_single_epoch(epoch, model, train_loader, optimizer, criterion, plot=False, clip_grad=grad_clip, accumulation_factor=accumulation_factor)
        clear_cuda()
        vl, va = evaluate(model, valid_loader, criterion)


        print_epoch_stat(epoch, time.time()-st, history=history, train_loss=tl, valid_loss=vl)
        early_stopping(tl, model)
        other_state = {
            'epoch': epoch,
            'optimizer_state': optimizer.state_dict(),
            'amp_state': amp.state_dict()
            }

        torch.save(other_state, other_checkpoint)

        history.loc[epoch, "LR"] = lr
        history.to_csv(history_path)

        if early_stopping.early_stop:
            if early_stopper_lr_decrease_count>0:
                early_stopper_lr_decrease_count = early_stopper_lr_decrease_count-1
                adjust_learning_rate(optimizer, early_stopper_lr_decrease)
                early_stopping.early_stop = False
                early_stopping.counter = 0
                lr*=early_stopper_lr_decrease
                print("Learning Rate Adjusted")
                accumulation_factor*=2
            else:
                break
示例#23
0
    if False:
        batch = train_data.next()
        images = batch.data[0][:].as_in_context(mx.gpu(0))
        labels = batch.label[0][:].as_in_context(mx.gpu(0))
        show_images(images.asnumpy(),
                    labels.asnumpy(),
                    rgb_mean,
                    std,
                    show_text=True,
                    fontsize=6,
                    MN=(2, 4))
        print(labels.shape)

    #2. net initialize
    net = SSD(1, verbose=False, prefix='ssd_')
    # net.hybridize() # MultiBoxPrior cannot support symbol
    # print(net)
    # tic = time.time()
    # anchors,box_preds,cls_preds = net(images)
    # print(time.time()-tic)
    # print(net)
    #MultiBoxTraget 作用是将生成的anchors与哪些ground truth对应,提取出anchors的偏移和对应的类型
    #预测的误差是每次网络的预测框g与anchors的差分别/anchor[xywh],然后作为smoothL1(label-g)解算,g才是预测
    # box_offset,box_mask,cls_labels = MultiBoxTarget(anchors,batch.label[0],cls_preds)
    # box_offset, box_mask, cls_labels = MultiBoxTarget(anchors, batch.label[0].as_in_context(mx.gpu(0)),
    #                                                   cls_preds.transpose((0, 2, 1)))

    #3. loss define
    cls_loss = FocalLoss()  # predict
    box_loss = SmoothL1Loss()  # regression