示例#1
0
def train(opt):
    num_gpus = 1
    if torch.cuda.is_available():
        num_gpus = torch.cuda.device_count()
        torch.cuda.manual_seed(123)
    else:
        torch.manual_seed(123)

    training_params = {
        "batch_size": opt.batch_size * num_gpus,
        "shuffle": True,
        "drop_last": True,
        "collate_fn": collater,
        "num_workers": 12
    }

    test_params = {
        "batch_size": opt.batch_size,
        "shuffle": False,
        "drop_last": False,
        "collate_fn": collater,
        "num_workers": 12
    }

    training_set = CocoDataset(root_dir=opt.data_path,
                               set="train2017",
                               transform=transforms.Compose(
                                   [Normalizer(),
                                    Augmenter(),
                                    Resizer()]))
    training_generator = DataLoader(training_set, **training_params)

    test_set = CocoDataset(root_dir=opt.data_path,
                           set="val2017",
                           transform=transforms.Compose(
                               [Normalizer(), Resizer()]))
    test_generator = DataLoader(test_set, **test_params)

    model = EfficientDet(num_classes=training_set.num_classes())

    if os.path.isdir(opt.log_path):
        shutil.rmtree(opt.log_path)
    os.makedirs(opt.log_path)

    if not os.path.isdir(opt.saved_path):
        os.makedirs(opt.saved_path)

    writer = SummaryWriter(opt.log_path)
    if torch.cuda.is_available():
        model = model.cuda()
        model = nn.DataParallel(model)

    optimizer = torch.optim.Adam(model.parameters(), opt.lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           verbose=True)

    best_loss = 1e5
    best_epoch = 0
    model.train()

    num_iter_per_epoch = len(training_generator)
    for epoch in range(opt.num_epochs):
        model.train()
        # if torch.cuda.is_available():
        #     model.module.freeze_bn()
        # else:
        #     model.freeze_bn()
        epoch_loss = []
        progress_bar = tqdm(training_generator)
        for iter, data in enumerate(progress_bar):
            try:
                optimizer.zero_grad()
                if torch.cuda.is_available():
                    cls_loss, reg_loss = model(
                        [data['img'].cuda().float(), data['annot'].cuda()])
                else:
                    cls_loss, reg_loss = model(
                        [data['img'].float(), data['annot']])

                cls_loss = cls_loss.mean()
                reg_loss = reg_loss.mean()
                loss = cls_loss + reg_loss
                if loss == 0:
                    continue
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
                optimizer.step()
                epoch_loss.append(float(loss))
                total_loss = np.mean(epoch_loss)

                progress_bar.set_description(
                    'Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Batch loss: {:.5f} Total loss: {:.5f}'
                    .format(epoch + 1, opt.num_epochs, iter + 1,
                            num_iter_per_epoch, cls_loss, reg_loss, loss,
                            total_loss))
                writer.add_scalar('Train/Total_loss', total_loss,
                                  epoch * num_iter_per_epoch + iter)
                writer.add_scalar('Train/Regression_loss', reg_loss,
                                  epoch * num_iter_per_epoch + iter)
                writer.add_scalar('Train/Classfication_loss (focal loss)',
                                  cls_loss, epoch * num_iter_per_epoch + iter)

            except Exception as e:
                print(e)
                continue
        scheduler.step(np.mean(epoch_loss))

        if epoch % opt.test_interval == 0:
            model.eval()
            loss_regression_ls = []
            loss_classification_ls = []
            for iter, data in enumerate(test_generator):
                with torch.no_grad():
                    if torch.cuda.is_available():
                        cls_loss, reg_loss = model(
                            [data['img'].cuda().float(), data['annot'].cuda()])
                    else:
                        cls_loss, reg_loss = model(
                            [data['img'].float(), data['annot']])

                    cls_loss = cls_loss.mean()
                    reg_loss = reg_loss.mean()

                    loss_classification_ls.append(float(cls_loss))
                    loss_regression_ls.append(float(reg_loss))

            cls_loss = np.mean(loss_classification_ls)
            reg_loss = np.mean(loss_regression_ls)
            loss = cls_loss + reg_loss

            print(
                'Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}'
                .format(epoch + 1, opt.num_epochs, cls_loss, reg_loss,
                        np.mean(loss)))
            writer.add_scalar('Test/Total_loss', loss, epoch)
            writer.add_scalar('Test/Regression_loss', reg_loss, epoch)
            writer.add_scalar('Test/Classfication_loss (focal loss)', cls_loss,
                              epoch)

            if loss + opt.es_min_delta < best_loss:
                best_loss = loss
                best_epoch = epoch
                torch.save(
                    model,
                    os.path.join(opt.saved_path,
                                 "signatrix_efficientdet_coco.pth"))

                dummy_input = torch.rand(opt.batch_size, 3, 512, 512)
                if torch.cuda.is_available():
                    dummy_input = dummy_input.cuda()
                if isinstance(model, nn.DataParallel):
                    model.module.backbone_net.model.set_swish(
                        memory_efficient=False)

                    torch.onnx.export(model.module,
                                      dummy_input,
                                      os.path.join(
                                          opt.saved_path,
                                          "signatrix_efficientdet_coco.onnx"),
                                      verbose=False,
                                      opset_version=11)
                    model.module.backbone_net.model.set_swish(
                        memory_efficient=True)
                else:
                    model.backbone_net.model.set_swish(memory_efficient=False)

                    torch.onnx.export(model,
                                      dummy_input,
                                      os.path.join(
                                          opt.saved_path,
                                          "signatrix_efficientdet_coco.onnx"),
                                      verbose=False,
                                      opset_version=11)
                    model.backbone_net.model.set_swish(memory_efficient=True)

            # Early stopping
            if epoch - best_epoch > opt.es_patience > 0:
                print(
                    "Stop training at epoch {}. The lowest loss achieved is {}"
                    .format(epoch, loss))
                break
    writer.close()
def train(opt):
    num_gpus = 1
    if torch.cuda.is_available():
        num_gpus = torch.cuda.device_count()
    else:
        raise Exception('no GPU')

    cudnn.benchmark = True

    training_params = {
        "batch_size": opt.batch_size * num_gpus,
        "shuffle": True,
        "drop_last": True,
        "collate_fn": collater,
        "num_workers": 12
    }

    test_params = {
        "batch_size": opt.batch_size,
        "shuffle": False,
        "drop_last": False,
        "collate_fn": collater,
        "num_workers": 12
    }

    training_set = CocoDataset(root_dir=opt.data_path,
                               set="train2017",
                               transform=transforms.Compose(
                                   [Normalizer(),
                                    Augmenter(),
                                    Resizer()]))
    training_generator = DataLoader(training_set, **training_params)

    test_set = CocoDataset(root_dir=opt.data_path,
                           set="val2017",
                           transform=transforms.Compose(
                               [Normalizer(), Resizer()]))
    test_generator = DataLoader(test_set, **test_params)

    opt.num_classes = training_set.num_classes()

    model = EfficientDet(opt)
    if opt.resume:
        print('Loading model...')
        model.load_state_dict(
            torch.load(os.path.join(opt.saved_path, opt.network + '.pth')))

    if not os.path.isdir(opt.saved_path):
        os.makedirs(opt.saved_path)

    model = model.cuda()
    model = nn.DataParallel(model)

    optimizer = torch.optim.AdamW(model.parameters(), opt.lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           verbose=True)

    best_loss = 1e5
    best_epoch = 0
    model.train()

    num_iter_per_epoch = len(training_generator)
    for epoch in range(opt.num_epochs):
        print('Epoch: {}/{}:'.format(epoch + 1, opt.num_epochs))
        model.train()
        epoch_loss = []
        progress_bar = tqdm(training_generator)
        for iter, data in enumerate(progress_bar):
            try:
                optimizer.zero_grad()
                if torch.cuda.is_available():
                    cls_loss, cls_2_loss, reg_loss = model(
                        [data['img'].cuda().float(), data['annot'].cuda()])
                else:
                    cls_loss, cls_2_loss, reg_loss = model(
                        [data['img'].float(), data['annot']])

                cls_loss = cls_loss.mean()
                reg_loss = reg_loss.mean()
                cls_2_loss = cls_2_loss.mean()
                loss = cls_loss + cls_2_loss + reg_loss
                if loss == 0:
                    continue
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
                optimizer.step()
                epoch_loss.append(float(loss))
                total_loss = np.mean(epoch_loss)

                progress_bar.set_description(
                    'Epoch: {}/{}. Iteration: {}/{}'.format(
                        epoch + 1, opt.num_epochs, iter + 1,
                        num_iter_per_epoch))

                progress_bar.write(
                    'Cls loss: {:.5f}\tReg loss: {:.5f}\tCls+Reg loss: {:.5f}\tBatch loss: {:.5f}\tTotal loss: {:.5f}'
                    .format(cls_loss, reg_loss, cls_loss + reg_loss, loss,
                            total_loss))

            except Exception as e:
                print(e)
                continue
        scheduler.step(np.mean(epoch_loss))

        if epoch % opt.test_interval == 0:
            model.eval()
            loss_regression_ls = []
            loss_classification_ls = []
            loss_classification_2_ls = []
            progress_bar = tqdm(test_generator)
            progress_bar.set_description_str(' Evaluating')
            for iter, data in enumerate(progress_bar):
                with torch.no_grad():
                    if torch.cuda.is_available():
                        cls_loss, cls_2_loss, reg_loss = model(
                            [data['img'].cuda().float(), data['annot'].cuda()])
                    else:
                        cls_loss, cls_2_loss, reg_loss = model(
                            [data['img'].float(), data['annot']])

                    cls_loss = cls_loss.mean()
                    cls_2_loss = cls_2_loss.mean()
                    reg_loss = reg_loss.mean()

                    loss_classification_ls.append(float(cls_loss))
                    loss_classification_2_ls.append(float(cls_2_loss))
                    loss_regression_ls.append(float(reg_loss))

            cls_loss = np.mean(loss_classification_ls)
            cls_2_loss = np.mean(loss_classification_2_ls)
            reg_loss = np.mean(loss_regression_ls)
            loss = cls_loss + cls_2_loss + reg_loss

            print(
                'Epoch: {}/{}. \nClassification loss: {:1.5f}. \tClassification_2 loss: {:1.5f}. \tRegression loss: {:1.5f}. \tTotal loss: {:1.5f}'
                .format(epoch + 1, opt.num_epochs, cls_loss, cls_2_loss,
                        reg_loss, np.mean(loss)))

            if loss + opt.es_min_delta < best_loss:
                print('Saving model...')
                best_loss = loss
                best_epoch = epoch
                torch.save(model.module.state_dict(),
                           os.path.join(opt.saved_path, opt.network + '.pth'))
                # torch.save(model, os.path.join(opt.saved_path, opt.network+'.pth'))

            # Early stopping
            if epoch - best_epoch > opt.es_patience > 0:
                print(
                    "Stop training at epoch {}. The lowest loss achieved is {}"
                    .format(epoch, loss))
                break
def test(opt):
    test_set = CocoDataset(opt.data_path,
                           set='val2017',
                           transform=transforms.Compose(
                               [Normalizer(), Resizer()]))
    opt.num_classes = test_set.num_classes()
    opt.batch_size = opt.batch_size * 4
    test_params = {
        "batch_size": opt.batch_size,
        "shuffle": False,
        "drop_last": False,
        "collate_fn": collater,
        "num_workers": 12
    }
    test_generator = DataLoader(test_set, **test_params)

    model = EfficientDet(opt)
    model.load_state_dict(
        torch.load(os.path.join(opt.pretrained_model, opt.network + '.pth')))
    model.cuda()
    model.set_is_training(False)
    model.eval()

    if os.path.isdir(opt.prediction_dir):
        shutil.rmtree(opt.prediction_dir)
    os.makedirs(opt.prediction_dir)

    progress_bar = tqdm(test_generator)
    progress_bar.set_description_str(' Evaluating')
    IoU_scores = []
    for i, data in enumerate(progress_bar):
        scale = data['scale']
        with torch.no_grad():
            output_list = model(data['img'].cuda().float())

        for j, output in enumerate(output_list):
            scores, labels, boxes = output
            annot = data['annot'][j]
            annot = annot[annot[:, 4] != -1]
            # print(scores.size(), labels.size(), boxes.size(), annot.size())
            if boxes.shape[0] == 0:
                if annot.size(0) == 0:
                    IoU_scores.append(1.0)
                else:
                    IoU_scores.append(0.0)
                continue
            if annot.size(0) == 0:
                IoU_scores.append(0.0)
            else:
                classes = set(annot[:, 4].tolist())
                cat = torch.cat(
                    [scores.view(-1, 1),
                     labels.view(-1, 1).float(), boxes],
                    dim=1)
                cat = cat[cat[:, 0] >= opt.cls_threshold]
                iou_score = []
                for c in classes:
                    box = cat[cat[:, 1] == c][:, 2:]
                    if box.size(0) == 0:
                        iou_score.append(0.0)
                        continue
                    tgt = annot[annot[:, 4] == c][:, :4]
                    iou_s = iou(box, tgt.cuda())
                    iou_score.append(iou_s.cpu().numpy())
                classes_pre = set(cat[:, 1].tolist())
                for c in classes_pre:
                    if c not in classes:
                        iou_score.append(0)
                # print(classes_pre, classes ,iou_score)
                IoU_scores.append(sum(iou_score) / len(iou_score))

            if writePIC:
                annot /= scale[j]
                boxes /= scale[j]
                image_info = test_set.coco.loadImgs(
                    test_set.image_ids[i * opt.batch_size + j])[0]
                # print(image_info['file_name'])
                path = os.path.join(test_set.root_dir, 'images',
                                    test_set.set_name, image_info['file_name'])
                output_image = cv2.imread(path)
                # print(output_image.shape)
                for box_id in range(boxes.shape[0]):
                    pred_prob = float(scores[box_id])
                    if pred_prob < opt.cls_threshold:
                        break
                    pred_label = int(labels[box_id])
                    xmin, ymin, xmax, ymax = boxes[box_id, :]
                    color = colors[pred_label]
                    cv2.rectangle(output_image, (xmin, ymin), (xmax, ymax),
                                  color, 1)
                    text_size = cv2.getTextSize(
                        COCO_CLASSES[pred_label] + ' : %.2f' % pred_prob,
                        cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]

                    cv2.rectangle(
                        output_image, (xmin, ymin),
                        (xmin + text_size[0] + 3, ymin + text_size[1] + 4),
                        color, -1)
                    cv2.putText(
                        output_image,
                        COCO_CLASSES[pred_label] + ' : %.2f' % pred_prob,
                        (xmin, ymin + text_size[1] + 4),
                        cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1)
                for box_id in range(annot.size(0)):
                    xmin, ymin, xmax, ymax = annot[box_id, :4]
                    cv2.rectangle(output_image, (xmin, ymin), (xmax, ymax),
                                  (255, 0, 0), 1)

                cv2.imwrite(
                    "{}/{}_prediction.jpg".format(
                        opt.prediction_dir, image_info["file_name"][:-4]),
                    output_image)
    print(sum(IoU_scores) / len(IoU_scores))
示例#4
0
def train(opt):
    num_gpus = 1
    if torch.cuda.is_available():
        num_gpus = torch.cuda.device_count()
        torch.cuda.manual_seed(123)
    else:
        torch.manual_seed(123)

    training_params = {
        "batch_size": opt.batch_size * num_gpus,
        "shuffle": True,
        "drop_last": True,
        "collate_fn": collater,
        "num_workers": 12
    }

    test_params = {
        "batch_size": opt.batch_size,
        "shuffle": False,
        "drop_last": False,
        "collate_fn": collater,
        "num_workers": 12
    }

    training_set = CocoDataset(root_dir=opt.data_path,
                               set="train2017",
                               transform=transforms.Compose(
                                   [Normalizer(),
                                    Augmenter(),
                                    Resizer()]))
    training_generator = DataLoader(training_set, **training_params)

    test_set = CocoDataset(root_dir=opt.data_path,
                           set="val2017",
                           transform=transforms.Compose(
                               [Normalizer(), Resizer()]))
    test_generator = DataLoader(test_set, **test_params)

    channels_map = {
        'efficientnet-b0': [40, 80, 192],
        'efficientnet-b1': [40, 80, 192],
        'efficientnet-b2': [48, 88, 208],
        'efficientnet-b3': [48, 96, 232],
        'efficientnet-b4': [56, 112, 272],
        'efficientnet-b5': [64, 128, 304],
        'efficientnet-b6': [72, 144, 344],
        'efficientnet-b7': [80, 160, 384],
        'efficientnet-b8': [80, 160, 384]
    }

    if os.path.isdir(opt.log_path):
        shutil.rmtree(opt.log_path)
    os.makedirs(opt.log_path)

    if not os.path.isdir(opt.saved_path):
        os.makedirs(opt.saved_path)

    writer = SummaryWriter(opt.log_path)

    if opt.resume:
        resume_path = os.path.join(opt.saved_path,
                                   'signatrix_efficientdet_coco_latest.pth')
        model = torch.load(resume_path).module
        print("model loaded from {}".format(resume_path))
    else:
        model = EfficientDet(
            num_classes=training_set.num_classes(),
            network=opt.backbone_network,
            remote_loading=opt.remote_loading,
            advprop=opt.advprop,
            conv_in_channels=channels_map[opt.backbone_network])
        print("model created with backbone {}, advprop {}".format(
            opt.backbone_network, opt.advprop))

    if torch.cuda.is_available():
        model = model.cuda()
        model = nn.DataParallel(model)

    if opt.resume:
        m = round(opt.start_epoch / 100)
        opt.lr = opt.lr * (0.1**m)
    optimizer = torch.optim.Adam(model.parameters(), opt.lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           verbose=True)

    best_loss = 1e5
    best_epoch = 0
    model.train()

    num_iter_per_epoch = len(training_generator)

    start_epoch = 0
    if opt.resume:
        start_epoch = opt.start_epoch
    for epoch in range(start_epoch, opt.num_epochs):
        model.train()
        # if torch.cuda.is_available():
        #     model.module.freeze_bn()
        # else:
        #     model.freeze_bn()
        epoch_loss = []
        progress_bar = tqdm(training_generator)
        for iter, data in enumerate(progress_bar):
            try:
                optimizer.zero_grad()
                if torch.cuda.is_available():
                    cls_loss, reg_loss = model(
                        [data['img'].cuda().float(), data['annot'].cuda()])
                else:
                    cls_loss, reg_loss = model(
                        [data['img'].float(), data['annot']])

                cls_loss = cls_loss.mean()
                reg_loss = reg_loss.mean()
                loss = cls_loss + reg_loss
                if loss == 0:
                    continue
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
                optimizer.step()
                epoch_loss.append(float(loss))
                total_loss = np.mean(epoch_loss)

                progress_bar.set_description(
                    '{} Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Batch loss: {:.5f} Total loss: {:.5f}'
                    .format(datetime.now(), epoch + 1, opt.num_epochs,
                            iter + 1, num_iter_per_epoch, cls_loss, reg_loss,
                            loss, total_loss))
                writer.add_scalar('Train/Total_loss', total_loss,
                                  epoch * num_iter_per_epoch + iter)
                writer.add_scalar('Train/Regression_loss', reg_loss,
                                  epoch * num_iter_per_epoch + iter)
                writer.add_scalar('Train/Classfication_loss (focal loss)',
                                  cls_loss, epoch * num_iter_per_epoch + iter)

            except Exception as e:
                print(e)
                continue
        scheduler.step(np.mean(epoch_loss))

        if epoch % opt.test_interval == 0:
            model.eval()
            loss_regression_ls = []
            loss_classification_ls = []
            for iter, data in enumerate(test_generator):
                with torch.no_grad():
                    if torch.cuda.is_available():
                        cls_loss, reg_loss = model(
                            [data['img'].cuda().float(), data['annot'].cuda()])
                    else:
                        cls_loss, reg_loss = model(
                            [data['img'].float(), data['annot']])

                    cls_loss = cls_loss.mean()
                    reg_loss = reg_loss.mean()

                    loss_classification_ls.append(float(cls_loss))
                    loss_regression_ls.append(float(reg_loss))

            cls_loss = np.mean(loss_classification_ls)
            reg_loss = np.mean(loss_regression_ls)
            loss = cls_loss + reg_loss

            print(
                '{} Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}'
                .format(datetime.now(), epoch + 1, opt.num_epochs, cls_loss,
                        reg_loss, np.mean(loss)))
            writer.add_scalar('Test/Total_loss', loss, epoch)
            writer.add_scalar('Test/Regression_loss', reg_loss, epoch)
            writer.add_scalar('Test/Classfication_loss (focal loss)', cls_loss,
                              epoch)

            if loss + opt.es_min_delta < best_loss:
                best_loss = loss
                best_epoch = epoch
                torch.save(
                    model,
                    os.path.join(
                        opt.saved_path,
                        "signatrix_efficientdet_coco_best_epoch{}.pth".format(
                            epoch)))
                ''' 
                dummy_input = torch.rand(opt.batch_size, 3, 512, 512)
                if torch.cuda.is_available():
                    dummy_input = dummy_input.cuda()
                if isinstance(model, nn.DataParallel):
                    model.module.backbone_net.model.set_swish(memory_efficient=False)
                    
                    torch.onnx.export(model.module, dummy_input,
                                      os.path.join(opt.saved_path, "signatrix_efficientdet_coco.onnx"),
                                      verbose=False)
                    
                    model.module.backbone_net.model.set_swish(memory_efficient=True)
                else:
                    model.backbone_net.model.set_swish(memory_efficient=False)
                    
                    torch.onnx.export(model, dummy_input,
                                      os.path.join(opt.saved_path, "signatrix_efficientdet_coco.onnx"),
                                      verbose=False)
                    
                    model.backbone_net.model.set_swish(memory_efficient=True)
                '''
            print("epoch:", epoch, "best_epoch:", best_epoch,
                  "epoch - best_epoch=", epoch - best_epoch)
            # Early stopping
            if epoch - best_epoch > opt.es_patience > 0:
                print(
                    "Stop training at epoch {}. The lowest loss achieved is {}"
                    .format(epoch, loss))
                break
        if epoch % opt.save_interval == 0:
            torch.save(
                model,
                os.path.join(opt.saved_path,
                             "signatrix_efficientdet_coco_latest.pth"))
    writer.close()