Exemplo n.º 1
0
from select import select, error as selecterror
from signal import signal, SIGINT, SIG_DFL
from socket import (socket, error as sockerr, has_ipv6,
                   AF_UNSPEC, AF_INET, AF_INET6, SOCK_DGRAM, IPPROTO_UDP)
from sys import exit, stderr
from time import time

# Local imports
from config import config, ConfigError
from config import log, LOG_ERROR, LOG_PRINT, LOG_VERBOSE, LOG_DEBUG
from db import dbconnect
# inet_pton isn't defined on windows, so use our own
from utils import inet_pton, stringtosockaddr, valid_addr

try:
    config.parse()
except ConfigError as err:
    # Note that we don't know how much user config is loaded at this stage
    log(LOG_ERROR, err)
    exit(1)

try:
    log_client, log_gamestat, db_id = dbconnect(config.db)
except ImportError as ex:
    def nodb(*args):
        '''This function is defined and used when the database import fails'''
        log(LOG_DEBUG, 'No database, not logged:', args)
    log_client = log_gamestat = nodb
    log(LOG_PRINT, 'Warning: database not available')
else:
    log(LOG_VERBOSE, db_id)
Exemplo n.º 2
0
def train(**kwargs):
    config.parse(kwargs)
    vis = Visualizer(port=2333, env=config.env)
    vis.log('Use config:')
    for k, v in config.__class__.__dict__.items():
        if not k.startswith('__'):
            vis.log(f"{k}: {getattr(config, k)}")

    # prepare data
    train_data = VB_Dataset(config.train_paths,
                            phase='train',
                            useRGB=config.useRGB,
                            usetrans=config.usetrans,
                            padding=config.padding,
                            balance=config.data_balance)
    val_data = VB_Dataset(config.test_paths,
                          phase='val',
                          useRGB=config.useRGB,
                          usetrans=config.usetrans,
                          padding=config.padding,
                          balance=False)
    print('Training Images:', train_data.__len__(), 'Validation Images:',
          val_data.__len__())
    dist = train_data.dist()
    print('Train Data Distribution:', dist, 'Val Data Distribution:',
          val_data.dist())

    train_dataloader = DataLoader(train_data,
                                  batch_size=config.batch_size,
                                  shuffle=True,
                                  num_workers=config.num_workers)
    val_dataloader = DataLoader(val_data,
                                batch_size=config.batch_size,
                                shuffle=False,
                                num_workers=config.num_workers)

    # prepare model
    # model = ResNet18(num_classes=config.num_classes)
    # model = Vgg16(num_classes=config.num_classes)
    # model = densenet_collapse(num_classes=config.num_classes)
    model = ShallowVgg(num_classes=config.num_classes)
    print(model)

    if config.load_model_path:
        model.load(config.load_model_path)
    if config.use_gpu:
        model.cuda()
    if config.parallel:
        model = torch.nn.DataParallel(
            model, device_ids=[x for x in range(config.num_of_gpu)])

    # criterion and optimizer
    # weight = torch.Tensor([1/dist['0'], 1/dist['1'], 1/dist['2'], 1/dist['3']])
    # weight = torch.Tensor([1/dist['0'], 1/dist['1']])
    # weight = torch.Tensor([dist['1'], dist['0']])
    # weight = torch.Tensor([1, 10])
    # vis.log(f'loss weight: {weight}')
    # print('loss weight:', weight)
    # weight = weight.cuda()

    # criterion = torch.nn.CrossEntropyLoss()
    criterion = LabelSmoothing(size=config.num_classes, smoothing=0.1)
    # criterion = torch.nn.CrossEntropyLoss(weight=weight)
    # criterion = FocalLoss(gamma=4, alpha=None)

    lr = config.lr
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=lr,
                                 weight_decay=config.weight_decay)

    # metric
    softmax = functional.softmax
    log_softmax = functional.log_softmax
    loss_meter = meter.AverageValueMeter()
    epoch_loss = meter.AverageValueMeter()
    train_cm = meter.ConfusionMeter(config.num_classes)
    train_AUC = meter.AUCMeter()

    previous_avgse = 0
    # previous_AUC = 0
    if config.parallel:
        save_model_dir = config.save_model_dir if config.save_model_dir else model.module.model_name
        save_model_name = config.save_model_name if config.save_model_name else model.module.model_name + '_best_model.pth'
    else:
        save_model_dir = config.save_model_dir if config.save_model_dir else model.model_name
        save_model_name = config.save_model_name if config.save_model_name else model.model_name + '_best_model.pth'
    save_epoch = 1  # 用于记录验证集上效果最好模型对应的epoch
    # process_record = {'epoch_loss': [],  # 用于记录实验过程中的曲线,便于画曲线图
    #                   'train_avgse': [], 'train_se0': [], 'train_se1': [], 'train_se2': [], 'train_se3': [],
    #                   'val_avgse': [], 'val_se0': [], 'val_se1': [], 'val_se2': [], 'val_se3': []}
    process_record = {
        'epoch_loss': [],  # 用于记录实验过程中的曲线,便于画曲线图
        'train_avgse': [],
        'train_se0': [],
        'train_se1': [],
        'val_avgse': [],
        'val_se0': [],
        'val_se1': [],
        'train_AUC': [],
        'val_AUC': []
    }

    # train
    for epoch in range(config.max_epoch):
        print(
            f"epoch: [{epoch+1}/{config.max_epoch}] {config.save_model_name[:-4]} =================================="
        )
        epoch_loss.reset()
        train_cm.reset()
        train_AUC.reset()

        # train
        model.train()
        for i, (image, label, image_path) in tqdm(enumerate(train_dataloader)):
            loss_meter.reset()

            # prepare input
            if config.use_gpu:
                image = image.cuda()
                label = label.cuda()

            # go through the model
            score = model(image)

            # backpropagate
            optimizer.zero_grad()
            # loss = criterion(score, label)
            loss = criterion(log_softmax(score, dim=1), label)
            loss.backward()
            optimizer.step()

            loss_meter.add(loss.item())
            epoch_loss.add(loss.item())
            train_cm.add(softmax(score, dim=1).data, label.data)
            positive_score = np.array([
                item[1]
                for item in softmax(score, dim=1).data.cpu().numpy().tolist()
            ])
            train_AUC.add(positive_score, label.data)

            if (i + 1) % config.print_freq == 0:
                vis.plot('loss', loss_meter.value()[0])

        # print result
        # train_se = [100. * train_cm.value()[0][0] / (train_cm.value()[0][0] + train_cm.value()[0][1] + train_cm.value()[0][2] + train_cm.value()[0][3]),
        #             100. * train_cm.value()[1][1] / (train_cm.value()[1][0] + train_cm.value()[1][1] + train_cm.value()[1][2] + train_cm.value()[1][3]),
        #             100. * train_cm.value()[2][2] / (train_cm.value()[2][0] + train_cm.value()[2][1] + train_cm.value()[2][2] + train_cm.value()[2][3]),
        #             100. * train_cm.value()[3][3] / (train_cm.value()[3][0] + train_cm.value()[3][1] + train_cm.value()[3][2] + train_cm.value()[3][3])]
        train_se = [
            100. * train_cm.value()[0][0] /
            (train_cm.value()[0][0] + train_cm.value()[0][1]),
            100. * train_cm.value()[1][1] /
            (train_cm.value()[1][0] + train_cm.value()[1][1])
        ]

        # validate
        model.eval()
        if (epoch + 1) % 1 == 0:
            val_cm, val_se, val_accuracy, val_AUC = val_2class(
                model, val_dataloader)

            if np.average(
                    val_se) > previous_avgse:  # 当测试集上的平均sensitivity升高时保存模型
                # if val_AUC.value()[0] > previous_AUC:  # 当测试集上的AUC升高时保存模型
                if config.parallel:
                    if not os.path.exists(
                            os.path.join('checkpoints', save_model_dir,
                                         save_model_name.split('.')[0])):
                        os.makedirs(
                            os.path.join('checkpoints', save_model_dir,
                                         save_model_name.split('.')[0]))
                    model.module.save(
                        os.path.join('checkpoints', save_model_dir,
                                     save_model_name.split('.')[0],
                                     save_model_name))
                else:
                    if not os.path.exists(
                            os.path.join('checkpoints', save_model_dir,
                                         save_model_name.split('.')[0])):
                        os.makedirs(
                            os.path.join('checkpoints', save_model_dir,
                                         save_model_name.split('.')[0]))
                    model.save(
                        os.path.join('checkpoints', save_model_dir,
                                     save_model_name.split('.')[0],
                                     save_model_name))
                previous_avgse = np.average(val_se)
                # previous_AUC = val_AUC.value()[0]
                save_epoch = epoch + 1

            process_record['epoch_loss'].append(epoch_loss.value()[0])
            process_record['train_avgse'].append(np.average(train_se))
            process_record['train_se0'].append(train_se[0])
            process_record['train_se1'].append(train_se[1])
            # process_record['train_se2'].append(train_se[2])
            # process_record['train_se3'].append(train_se[3])
            process_record['train_AUC'].append(train_AUC.value()[0])
            process_record['val_avgse'].append(np.average(val_se))
            process_record['val_se0'].append(val_se[0])
            process_record['val_se1'].append(val_se[1])
            # process_record['val_se2'].append(val_se[2])
            # process_record['val_se3'].append(val_se[3])
            process_record['val_AUC'].append(val_AUC.value()[0])

            # vis.plot_many({'epoch_loss': epoch_loss.value()[0],
            #                'train_avgse': np.average(train_se), 'train_se0': train_se[0], 'train_se1': train_se[1], 'train_se2': train_se[2], 'train_se3': train_se[3],
            #                'val_avgse': np.average(val_se), 'val_se0': val_se[0], 'val_se1': val_se[1], 'val_se2': val_se[2], 'val_se3': val_se[3]})
            # vis.log(f"epoch: [{epoch+1}/{config.max_epoch}] =========================================")
            # vis.log(f"lr: {optimizer.param_groups[0]['lr']}, loss: {round(loss_meter.value()[0], 5)}")
            # vis.log(f"train_avgse: {round(np.average(train_se), 4)}, train_se0: {round(train_se[0], 4)}, train_se1: {round(train_se[1], 4)}, train_se2: {round(train_se[2], 4)}, train_se3: {round(train_se[3], 4)},")
            # vis.log(f"val_avgse: {round(np.average(val_se), 4)}, val_se0: {round(val_se[0], 4)}, val_se1: {round(val_se[1], 4)}, val_se2: {round(val_se[2], 4)}, val_se3: {round(val_se[3], 4)}")
            # vis.log(f'train_cm: {train_cm.value()}')
            # vis.log(f'val_cm: {val_cm.value()}')
            # print("lr:", optimizer.param_groups[0]['lr'], "loss:", round(epoch_loss.value()[0], 5))
            # print('train_avgse:', round(np.average(train_se), 4), 'train_se0:', round(train_se[0], 4), 'train_se1:', round(train_se[1], 4), 'train_se2:', round(train_se[2], 4), 'train_se3:', round(train_se[3], 4))
            # print('val_avgse:', round(np.average(val_se), 4), 'val_se0:', round(val_se[0], 4), 'val_se1:', round(val_se[1], 4), 'val_se2:', round(val_se[2], 4), 'val_se3:', round(val_se[3], 4))
            # print('train_cm:')
            # print(train_cm.value())
            # print('val_cm:')
            # print(val_cm.value())

            vis.plot_many({
                'epoch_loss': epoch_loss.value()[0],
                'train_avgse': np.average(train_se),
                'train_se0': train_se[0],
                'train_se1': train_se[1],
                'val_avgse': np.average(val_se),
                'val_se0': val_se[0],
                'val_se1': val_se[1],
                'train_AUC': train_AUC.value()[0],
                'val_AUC': val_AUC.value()[0]
            })
            vis.log(
                f"epoch: [{epoch + 1}/{config.max_epoch}] ========================================="
            )
            vis.log(
                f"lr: {optimizer.param_groups[0]['lr']}, loss: {round(loss_meter.value()[0], 5)}"
            )
            vis.log(
                f"train_avgse: {round(np.average(train_se), 4)}, train_se0: {round(train_se[0], 4)}, train_se1: {round(train_se[1], 4)}"
            )
            vis.log(
                f"val_avgse: {round(np.average(val_se), 4)}, val_se0: {round(val_se[0], 4)}, val_se1: {round(val_se[1], 4)}"
            )
            vis.log(f'train_AUC: {train_AUC.value()[0]}')
            vis.log(f'val_AUC: {val_AUC.value()[0]}')
            vis.log(f'train_cm: {train_cm.value()}')
            vis.log(f'val_cm: {val_cm.value()}')
            print("lr:", optimizer.param_groups[0]['lr'], "loss:",
                  round(epoch_loss.value()[0], 5))
            print('train_avgse:', round(np.average(train_se), 4), 'train_se0:',
                  round(train_se[0], 4), 'train_se1:', round(train_se[1], 4))
            print('val_avgse:', round(np.average(val_se), 4), 'val_se0:',
                  round(val_se[0], 4), 'val_se1:', round(val_se[1], 4))
            print('train_AUC:',
                  train_AUC.value()[0], 'val_AUC:',
                  val_AUC.value()[0])
            print('train_cm:')
            print(train_cm.value())
            print('val_cm:')
            print(val_cm.value())

            if os.path.exists(
                    os.path.join('checkpoints', save_model_dir,
                                 save_model_name.split('.')[0])):
                write_json(file=os.path.join('checkpoints', save_model_dir,
                                             save_model_name.split('.')[0],
                                             'process_record.json'),
                           content=process_record)

        # if (epoch+1) % 5 == 0:
        #     lr = lr * config.lr_decay
        #     for param_group in optimizer.param_groups:
        #         param_group['lr'] = lr

    vis.log(f"Best Epoch: {save_epoch}")
    print("Best Epoch:", save_epoch)
Exemplo n.º 3
0
def iter_train(**kwargs):
    config.parse(kwargs)

    # ============================================ Visualization =============================================
    # vis = Visualizer(port=2333, env=config.env)
    # vis.log('Use config:')
    # for k, v in config.__class__.__dict__.items():
    #     if not k.startswith('__'):
    #         vis.log(f"{k}: {getattr(config, k)}")

    # ============================================= Prepare Data =============================================
    train_data = VB_Dataset(config.train_paths,
                            phase='train',
                            num_classes=config.num_classes,
                            useRGB=config.useRGB,
                            usetrans=config.usetrans,
                            padding=config.padding,
                            balance=config.data_balance)
    val_data = VB_Dataset(config.test_paths,
                          phase='val',
                          num_classes=config.num_classes,
                          useRGB=config.useRGB,
                          usetrans=config.usetrans,
                          padding=config.padding,
                          balance=config.data_balance)
    train_dist, val_dist = train_data.dist(), val_data.dist()
    train_data_scale, val_data_scale = train_data.scale, val_data.scale
    print('Training Images:', train_data.__len__(), 'Validation Images:',
          val_data.__len__())
    print('Train Data Distribution:', train_dist, 'Val Data Distribution:',
          val_dist)

    train_dataloader = DataLoader(train_data,
                                  batch_size=config.batch_size,
                                  shuffle=True,
                                  num_workers=config.num_workers)
    val_dataloader = DataLoader(val_data,
                                batch_size=config.batch_size,
                                shuffle=False,
                                num_workers=config.num_workers)

    # ============================================= Prepare Model ============================================
    # model = ResNet18(num_classes=config.num_classes)
    # model = ResNet34(num_classes=config.num_classes)
    # model = ResNet50(num_classes=config.num_classes)
    model = Vgg16(num_classes=config.num_classes)
    # model = AlexNet(num_classes=config.num_classes)
    # model = densenet_collapse(num_classes=config.num_classes)
    # model = ShallowVgg(num_classes=config.num_classes)
    # model = CustomedNet(num_classes=config.num_classes)
    # model = DualNet(num_classes=config.num_classes)
    # model = SkipResNet18(num_classes=config.num_classes)
    # model = DensResNet18(num_classes=config.num_classes)
    # model = GuideResNet18(num_classes=config.num_classes)
    # print(model)

    if config.load_model_path:
        model.load(config.load_model_path)
    if config.use_gpu:
        model.cuda()
    if config.parallel:
        model = torch.nn.DataParallel(model,
                                      device_ids=list(range(
                                          config.num_of_gpu)))

    # =========================================== Criterion and Optimizer =====================================
    # weight = torch.Tensor([1/dist['0'], 1/dist['1'], 1/dist['2'], 1/dist['3']])
    # weight = torch.Tensor([1/dist['0'], 1/dist['1']])
    # weight = torch.Tensor([dist['1'], dist['0']])
    # weight = torch.Tensor([1, 10])
    # vis.log(f'loss weight: {weight}')
    # print('loss weight:', weight)
    # weight = weight.cuda()

    criterion = torch.nn.CrossEntropyLoss()
    # criterion = torch.nn.CrossEntropyLoss(weight=weight)
    # criterion = LabelSmoothing(size=config.num_classes, smoothing=0.2)
    # criterion = FocalLoss(gamma=4, alpha=None)

    lr = config.lr
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=lr,
                                 weight_decay=config.weight_decay)

    # ================================================== Metrics ===============================================
    log_softmax = functional.log_softmax
    loss_meter = meter.AverageValueMeter()

    # ====================================== Saving and Recording Configuration =================================
    previous_AUC = 0
    previous_mAP = 0
    save_iter = 1  # 用于记录验证集上效果最好模型对应的epoch
    if config.parallel:
        save_model_dir = config.save_model_dir if config.save_model_dir else model.module.model_name
        save_model_name = config.save_model_name if config.save_model_name else model.module.model_name + '_best_model.pth'
    else:
        save_model_dir = config.save_model_dir if config.save_model_dir else model.model_name
        save_model_name = config.save_model_name if config.save_model_name else model.model_name + '_best_model.pth'
    if config.num_classes == 2:  # 2分类
        process_record = {
            'loss': [],  # 用于记录实验过程中的曲线,便于画曲线图
            'train_avg': [],
            'train_sp': [],
            'train_se': [],
            'val_avg': [],
            'val_sp': [],
            'val_se': [],
            'train_AUC': [],
            'val_AUC': []
        }
    elif config.num_classes == 3:  # 3分类
        process_record = {
            'loss': [],  # 用于记录实验过程中的曲线,便于画曲线图
            'train_sp0': [],
            'train_se0': [],
            'train_sp1': [],
            'train_se1': [],
            'train_sp2': [],
            'train_se2': [],
            'val_sp0': [],
            'val_se0': [],
            'val_sp1': [],
            'val_se1': [],
            'val_sp2': [],
            'val_se2': [],
            'train_mAUC': [],
            'val_mAUC': [],
            'train_mAP': [],
            'val_mAP': []
        }
    else:
        raise ValueError

    # ================================================== Training ===============================================
    iteration = 0
    # ****************************************** train ****************************************
    train_iter = iter(train_dataloader)
    model.train()
    while iteration < config.max_iter:
        try:
            image, label, image_path = next(train_iter)
        except:
            train_iter = iter(train_dataloader)
            image, label, image_path = next(train_iter)

        iteration += 1

        # ------------------------------------ prepare input ------------------------------------
        if config.use_gpu:
            image = image.cuda()
            label = label.cuda()

        # ---------------------------------- go through the model --------------------------------
        score = model(image)

        # ----------------------------------- backpropagate -------------------------------------
        optimizer.zero_grad()
        loss = criterion(score, label)
        # loss = criterion(log_softmax(score, dim=1), label)  # LabelSmoothing
        loss.backward()
        optimizer.step()

        # ------------------------------------ record loss ------------------------------------
        loss_meter.add(loss.item())

        if iteration % config.print_freq == 0:
            tqdm.write(
                f"iter: [{iteration}/{config.max_iter}] {config.save_model_name[:-4]} =================================="
            )

            # *************************************** validate ***************************************
            if config.num_classes == 2:  # 2分类
                model.eval()
                train_cm, train_AUC, train_sp, train_se, train_T, train_accuracy = val_2class(
                    model, train_dataloader, train_dist)
                val_cm, val_AUC, val_sp, val_se, val_T, val_accuracy = val_2class(
                    model, val_dataloader, val_dist)
                # vis.plot('loss', loss_meter.value()[0])

                model.train()

                # ------------------------------------ save model ------------------------------------
                if val_AUC > previous_AUC:  # 当测试集上的AUC升高时保存模型
                    if config.parallel:
                        if not os.path.exists(
                                os.path.join('checkpoints', save_model_dir,
                                             save_model_name[:-4])):
                            os.makedirs(
                                os.path.join('checkpoints', save_model_dir,
                                             save_model_name[:-4]))
                        model.module.save(
                            os.path.join('checkpoints', save_model_dir,
                                         save_model_name[:-4],
                                         save_model_name))
                    else:
                        if not os.path.exists(
                                os.path.join('checkpoints', save_model_dir,
                                             save_model_name[:-4])):
                            os.makedirs(
                                os.path.join('checkpoints', save_model_dir,
                                             save_model_name[:-4]))
                        model.save(
                            os.path.join('checkpoints', save_model_dir,
                                         save_model_name[:-4],
                                         save_model_name))
                    previous_AUC = val_AUC
                    save_iter = iteration

                # ---------------------------------- recond and print ---------------------------------
                process_record['loss'].append(loss_meter.value()[0])
                process_record['train_avg'].append((train_sp + train_se) / 2)
                process_record['train_sp'].append(train_sp)
                process_record['train_se'].append(train_se)
                process_record['train_AUC'].append(train_AUC)
                process_record['val_avg'].append((val_sp + val_se) / 2)
                process_record['val_sp'].append(val_sp)
                process_record['val_se'].append(val_se)
                process_record['val_AUC'].append(val_AUC)

                # vis.plot_many({'loss': loss_meter.value()[0],
                #                'train_avg': (train_sp + train_se) / 2, 'train_sp': train_sp, 'train_se': train_se,
                #                'val_avg': (val_sp + val_se) / 2, 'val_sp': val_sp, 'val_se': val_se,
                #                'train_AUC': train_AUC, 'val_AUC': val_AUC})
                # vis.log(f"iter: [{iteration}/{config.max_iter}] =========================================")
                # vis.log(f"lr: {optimizer.param_groups[0]['lr']}, loss: {round(loss_meter.value()[0], 5)}")
                # vis.log(f"train_avg: {round((train_sp + train_se) / 2, 4)}, train_sp: {round(train_sp, 4)}, train_se: {round(train_se, 4)}")
                # vis.log(f"val_avg: {round((val_sp + val_se) / 2, 4)}, val_sp: {round(val_sp, 4)}, val_se: {round(val_se, 4)}")
                # vis.log(f'train_AUC: {train_AUC}')
                # vis.log(f'val_AUC: {val_AUC}')
                # vis.log(f'train_cm: {train_cm}')
                # vis.log(f'val_cm: {val_cm}')
                print("lr:", optimizer.param_groups[0]['lr'], "loss:",
                      round(loss_meter.value()[0], 5))
                print('train_avg:', round((train_sp + train_se) / 2, 4),
                      'train_sp:', round(train_sp, 4), 'train_se:',
                      round(train_se, 4))
                print('val_avg:', round((val_sp + val_se) / 2, 4), 'val_sp:',
                      round(val_sp, 4), 'val_se:', round(val_se, 4))
                print('train_AUC:', train_AUC, 'val_AUC:', val_AUC)
                print('train_cm:')
                print(train_cm)
                print('val_cm:')
                print(val_cm)

            elif config.num_classes == 3:  # 3分类
                model.eval()
                train_cm, train_mAP, train_sp, train_se, train_mAUC, train_accuracy = val_3class(
                    model, train_dataloader, train_data_scale)
                val_cm, val_mAP, val_sp, val_se, val_mAUC, val_accuracy = val_3class(
                    model, val_dataloader, val_data_scale)
                model.train()

                # ------------------------------------ save model ------------------------------------
                if val_mAP > previous_mAP:  # 当测试集上的mAP升高时保存模型
                    if config.parallel:
                        if not os.path.exists(
                                os.path.join('checkpoints', save_model_dir,
                                             save_model_name[:-4])):
                            os.makedirs(
                                os.path.join('checkpoints', save_model_dir,
                                             save_model_name[:-4]))
                        model.module.save(
                            os.path.join('checkpoints', save_model_dir,
                                         save_model_name[:-4],
                                         save_model_name))
                    else:
                        if not os.path.exists(
                                os.path.join('checkpoints', save_model_dir,
                                             save_model_name[:-4])):
                            os.makedirs(
                                os.path.join('checkpoints', save_model_dir,
                                             save_model_name[:-4]))
                        model.save(
                            os.path.join('checkpoints', save_model_dir,
                                         save_model_name[:-4],
                                         save_model_name))
                    previous_mAP = val_mAP
                    save_iter = iteration

                # ---------------------------------- recond and print ---------------------------------
                process_record['loss'].append(loss_meter.value()[0])
                process_record['train_sp0'].append(train_sp[0])
                process_record['train_se0'].append(train_se[0])
                process_record['train_sp1'].append(train_sp[1])
                process_record['train_se1'].append(train_se[1])
                process_record['train_sp2'].append(train_sp[2])
                process_record['train_se2'].append(train_se[2])
                process_record['train_mAUC'].append(float(train_mAUC))
                process_record['train_mAP'].append(float(train_mAP))
                process_record['val_sp0'].append(val_sp[0])
                process_record['val_se0'].append(val_se[0])
                process_record['val_sp1'].append(val_sp[1])
                process_record['val_se1'].append(val_se[1])
                process_record['val_sp2'].append(val_sp[2])
                process_record['val_se2'].append(val_se[2])
                process_record['val_mAUC'].append(float(val_mAUC))
                process_record['val_mAP'].append(float(val_mAP))

                # vis.plot_many({'loss': loss_meter.value()[0],
                #                'train_sp0': train_se[0], 'train_sp1': train_se[1], 'train_sp2': train_se[2],
                #                'train_se0': train_se[0], 'train_se1': train_se[1], 'train_se2': train_se[2],
                #                'val_sp0': val_se[0], 'val_sp1': val_se[1], 'val_sp2': val_se[2],
                #                'val_se0': val_se[0], 'val_se1': val_se[1], 'val_se2': val_se[2],
                #                'train_mAP': train_mAP, 'val_mAP': val_mAP})
                # vis.log(f"iter: [{iteration}/{config.max_iter}] =========================================")
                # vis.log(f"lr: {optimizer.param_groups[0]['lr']}, loss: {round(loss_meter.value()[0], 5)}")
                # vis.log(f"train_sp0: {round(train_sp[0], 4)}, train_sp1: {round(train_sp[1], 4)}, train_sp2: {round(train_sp[2], 4)}")
                # vis.log(f"train_se0: {round(train_se[0], 4)}, train_se1: {round(train_se[1], 4)}, train_se2: {round(train_se[2], 4)}")
                # vis.log(f"val_sp0: {round(val_sp[0], 4)}, val_sp1: {round(val_sp[1], 4)}, val_sp2: {round(val_sp[2], 4)}")
                # vis.log(f"val_se0: {round(val_se[0], 4)}, val_se1: {round(val_se[1], 4)}, val_se2: {round(val_se[2], 4)}")
                # vis.log(f"train_mAP: {train_mAP}, val_mAP: {val_mAP}")
                # vis.log(f'train_cm: {train_cm}')
                # vis.log(f'val_cm: {val_cm}')
                print("lr:", optimizer.param_groups[0]['lr'], "loss:",
                      round(loss_meter.value()[0], 5))
                print('train_sp0:', round(train_sp[0], 4), 'train_sp1:',
                      round(train_sp[1], 4), 'train_sp2:',
                      round(train_sp[2], 4))
                print('train_se0:', round(train_se[0], 4), 'train_se1:',
                      round(train_se[1], 4), 'train_se2:',
                      round(train_se[2], 4))
                print('val_sp0:', round(val_sp[0], 4), 'val_sp1:',
                      round(val_sp[1], 4), 'val_sp2:', round(val_sp[2], 4))
                print('val_se0:', round(val_se[0], 4), 'val_se1:',
                      round(val_se[1], 4), 'val_se2:', round(val_se[2], 4))
                print('mSP:', round(sum(val_sp) / 3, 5), 'mSE:',
                      round(sum(val_se) / 3, 5))
                print('train_mAUC:', train_mAUC, 'val_mAUC:', val_mAUC)
                print('train_mAP:', train_mAP, 'val_mAP:', val_mAP)
                print('train_cm:')
                print(train_cm)
                print('val_cm:')
                print(val_cm)
                print('Best mAP:', previous_mAP)

            loss_meter.reset()

        # ------------------------------------ save record ------------------------------------
        if os.path.exists(
                os.path.join('checkpoints', save_model_dir,
                             save_model_name.split('.')[0])):
            write_json(file=os.path.join('checkpoints', save_model_dir,
                                         save_model_name.split('.')[0],
                                         'process_record.json'),
                       content=process_record)

    # vis.log(f"Best Iter: {save_iter}")
    print("Best Iter:", save_iter)
Exemplo n.º 4
0
def test(**kwargs):
    config.parse(kwargs)

    # prepare data
    test_data = PairSWDataset(config.test_paths, phase='test', useRGB=config.useRGB, usetrans=config.usetrans, balance=False)
    test_dataloader = DataLoader(test_data, batch_size=config.batch_size, shuffle=False, num_workers=config.num_workers)
    print('Test Image:', test_data.__len__())

    # prepare model
    model = SiameseNet(num_classes=config.num_classes)
    print(model)

    if config.load_model_path:
        model.load(config.load_model_path)
        print('Model has been loaded!')
    else:
        print("Don't load model")
    if config.use_gpu:
        model.cuda()
    if config.parallel:
        model = torch.nn.DataParallel(model, device_ids=[x for x in range(config.num_of_gpu)])
    model.eval()

    test_cm = meter.ConfusionMeter(config.num_classes)
    softmax = functional.softmax
    results = []

    # go through the model
    for i, (image, label, image_path) in tqdm(enumerate(test_dataloader)):
        img = Variable(image, volatile=True)
        target = Variable(label)
        if config.use_gpu:
            img = img.cuda()
            target = target.cuda()

        score = model(img)

        test_cm.add(softmax(score, dim=1).data, target.data)

        for l, p, ip in zip(label, softmax(score, dim=1).data, image_path):
            if p[0] >= p[1]:
                results.append((ip, l, 0, round(p[0], 4), round(p[1], 4)))
            else:
                results.append((ip, l, 1, round(p[0], 4), round(p[1], 4)))

        # for p, ip in zip(softmax(score, dim=1).data, image_path):
        #     # print(p)
        #     b = ip.split('/')[-1].split('.')[0].split('_')[2:6]
        #     if p[1] >= 0.5:
        #         if ip.split('/')[-2] in positive_bbox.keys():
        #             positive_bbox[ip.split('/')[-2]].append((int(b[0]), int(b[1]), int(b[2]), int(b[3]), p[1]))
        #         else:
        #             positive_bbox[ip.split('/')[-2]] = [(int(b[0]), int(b[1]), int(b[2]), int(b[3]), p[1])]
        #     else:
        #         pass

    ACC = 100. * sum([test_cm.value()[c][c] for c in range(config.num_classes)]) / test_cm.value().sum()
    SE = 100. * test_cm.value()[1][1] / (test_cm.value()[1][0] + test_cm.value()[1][1])

    print('confusion matrix:')
    print(test_cm.value())
    print('test accuracy:', ACC)
    print('Sensitivity:', SE)

    if config.result_file:
        write_csv(os.path.join('results', config.result_file), tag=['path', 'label', 'predict', 'p1', 'p2'], content=results)
Exemplo n.º 5
0
def train_pair(**kwargs):
    config.parse(kwargs)
    vis = Visualizer(port=2333, env=config.env)
    vis.log('Use config:')
    for k, v in config.__class__.__dict__.items():
        if not k.startswith('__'):
            vis.log(f"{k}: {getattr(config, k)}")

    # prepare data
    train_data = PairSWDataset(config.train_paths, phase='train', useRGB=config.useRGB, usetrans=config.usetrans, balance=config.data_balance)
    valpair_data = PairSWDataset(config.test_paths, phase='val_pair', useRGB=config.useRGB, usetrans=config.usetrans, balance=False)
    print('Training Samples:', train_data.__len__(), 'ValPair Samples:', valpair_data.__len__())
    dist = train_data.dist()
    print('Train Data Distribution:', dist)

    train_dataloader = DataLoader(train_data, batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers)
    valpair_dataloader = DataLoader(valpair_data, batch_size=config.batch_size, shuffle=False, num_workers=config.num_workers)

    # prepare model
    model = SiameseNet(num_classes=config.num_classes)
    print(model)

    if config.load_model_path:
        model.load(config.load_model_path)
    if config.use_gpu:
        model.cuda()
    if config.parallel:
        model = torch.nn.DataParallel(model, device_ids=[x for x in range(config.num_of_gpu)])

    model.train()

    # criterion and optimizer
    weight_pair = torch.Tensor([1, 1.5])
    vis.log(f'pair loss weight: {weight_pair}')
    print('pair loss weight:', weight_pair)
    weight_pair = weight_pair.cuda()
    pair_criterion = torch.nn.CrossEntropyLoss(weight=weight_pair)

    lr = config.lr
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=config.weight_decay)

    # metric
    softmax = functional.softmax
    pair_loss_meter = meter.AverageValueMeter()
    pair_epoch_loss = meter.AverageValueMeter()

    pair_train_cm = meter.ConfusionMeter(config.num_classes)
    # previous_loss = 100
    pair_previous_avg_se = 0

    # train
    if config.parallel:
        if not os.path.exists(os.path.join('checkpoints', model.module.model_name)):
            os.mkdir(os.path.join('checkpoints', model.module.model_name))
    else:
        if not os.path.exists(os.path.join('checkpoints', model.model_name)):
            os.mkdir(os.path.join('checkpoints', model.model_name))

    for epoch in range(config.max_epoch):
        print(f"epoch: [{epoch+1}/{config.max_epoch}] =============================================")
        pair_train_cm.reset()
        pair_epoch_loss.reset()

        # train
        for i, (image_1, image_2, label_1, label_2, label_res, _, _) in tqdm(enumerate(train_dataloader)):
            pair_loss_meter.reset()

            # prepare input
            image_1 = Variable(image_1)
            image_2 = Variable(image_2)
            target_res = Variable(label_res)

            if config.use_gpu:
                image_1 = image_1.cuda()
                image_2 = image_2.cuda()
                target_res = target_res.cuda()

            # go through the model
            score_1, score_2, score_res = model(image_1, image_2)

            # backpropagate
            optimizer.zero_grad()
            pair_loss = pair_criterion(score_res, target_res)
            pair_loss.backward()
            optimizer.step()

            pair_loss_meter.add(pair_loss.data[0])
            pair_epoch_loss.add(pair_loss.data[0])

            pair_train_cm.add(softmax(score_res, dim=1).data, target_res.data)

            if (i+1) % config.print_freq == 0:
                vis.plot('loss', pair_loss_meter.value()[0])

        # print result
        pair_train_se = [100. * pair_train_cm.value()[0][0] / (pair_train_cm.value()[0][0] + pair_train_cm.value()[0][1]),
                         100. * pair_train_cm.value()[1][1] / (pair_train_cm.value()[1][0] + pair_train_cm.value()[1][1])]
        model.eval()
        pair_val_cm, pair_val_accuracy, pair_val_se = val_pair(model, valpair_dataloader)

        if np.average(pair_val_se) > pair_previous_avg_se:  # 当测试集上的平均sensitivity升高时保存模型
            if config.parallel:
                save_model_dir = config.save_model_dir if config.save_model_dir else model.module.model_name
                save_model_name = config.save_model_name if config.save_model_name else model.module.model_name + '_best_model.pth'
                if not os.path.exists(os.path.join('checkpoints', save_model_dir)):
                    os.makedirs(os.path.join('checkpoints', save_model_dir))
                model.module.save(os.path.join('checkpoints', save_model_dir, save_model_name))
            else:
                save_model_dir = config.save_model_dir if config.save_model_dir else model.model_name
                save_model_name = config.save_model_name if config.save_model_name else model.model_name + '_best_model.pth'
                if not os.path.exists(os.path.join('checkpoints', save_model_dir)):
                    os.makedirs(os.path.join('checkpoints', save_model_dir))
                model.save(os.path.join('checkpoints', save_model_dir, save_model_name))
            pair_previous_avg_se = np.average(pair_val_se)

        if epoch+1 == config.max_epoch:  # 保存最后一个模型
            if config.parallel:
                save_model_dir = config.save_model_dir if config.save_model_dir else model.module.model_name
                save_model_name = config.save_model_name.split('.pth')[0]+'_last.pth' if config.save_model_name else model.module.model_name + '_last_model.pth'
            else:
                save_model_dir = config.save_model_dir if config.save_model_dir else model.model_name
                save_model_name = config.save_model_name.split('.pth')[0]+'_last.pth' if config.save_model_name else model.model_name + '_last_model.pth'
            if not os.path.exists(os.path.join('checkpoints', save_model_dir)):
                os.makedirs(os.path.join('checkpoints', save_model_dir))
            model.save(os.path.join('checkpoints', save_model_dir, save_model_name))

        vis.plot_many({'epoch_loss': pair_epoch_loss.value()[0],
                       'pair_train_avg_se': np.average(pair_train_se), 'pair_train_se_0': pair_train_se[0], 'pair_train_se_1': pair_train_se[1],
                       'pair_val_avg_se': np.average(pair_val_se), 'pair_val_se_0': pair_val_se[0], 'pair_val_se_1': pair_val_se[1]})
        vis.log(f"epoch: [{epoch+1}/{config.max_epoch}] ===============================================")
        vis.log(f"lr: {lr}, loss: {round(pair_epoch_loss.value()[0], 5)}")
        vis.log(f"pair_train_avg_se: {round(np.average(pair_train_se), 4)}, pair_train_se_0: {round(pair_train_se[0], 4)}, pair_train_se_1: {round(pair_train_se[1], 4)}")
        vis.log(f"pair_val_avg_se: {round(sum(pair_val_se) / len(pair_val_se), 4)}, pair_val_se_0: {round(pair_val_se[0], 4)}, pair_val_se_1: {round(pair_val_se[1], 4)}")
        vis.log(f'pair_train_cm: {pair_train_cm.value()}')
        vis.log(f'pair_val_cm: {pair_val_cm.value()}')
        print("lr:", lr, "loss:", round(pair_epoch_loss.value()[0], 5))
        print('pair_train_avg_se:', round(np.average(pair_train_se), 4), 'pair_train_se_0:', round(pair_train_se[0], 4), 'pair_train_se_1:', round(pair_train_se[1], 4))
        print('pair_val_avg_se:', round(np.average(pair_val_se), 4), 'pair_val_se_0:', round(pair_val_se[0], 4), 'pair_val_se_1:', round(pair_val_se[1], 4))
        print('pair_train_cm:')
        print(pair_train_cm.value())
        print('pair_val_cm:')
        print(pair_val_cm.value())

        # update learning rate
        # if loss_meter.value()[0] > previous_loss:
        #     lr = lr * config.lr_decay
        #     for param_group in optimizer.param_groups:
        #         param_group['lr'] = lr
        # previous_loss = loss_meter.value()[0]
        if (epoch+1) % 5 == 0:
            lr = lr * config.lr_decay
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
Exemplo n.º 6
0
def train(**kwargs):
    config.parse(kwargs)

    # ============================================ Visualization =============================================
    vis = Visualizer(port=2333, env=config.env)
    vis.log('Use config:')
    for k, v in config.__class__.__dict__.items():
        if not k.startswith('__'):
            vis.log(f"{k}: {getattr(config, k)}")

    # ============================================= Prepare Data =============================================
    train_data_1 = SlideWindowDataset(config.train_paths, phase='train', useRGB=config.useRGB, usetrans=config.usetrans, balance=config.data_balance)
    train_data_2 = SlideWindowDataset(config.train_paths, phase='train', useRGB=config.useRGB, usetrans=config.usetrans, balance=config.data_balance)
    val_data = SlideWindowDataset(config.test_paths, phase='val', useRGB=config.useRGB, usetrans=config.usetrans, balance=False)
    print('Training Images:', train_data_1.__len__(), 'Validation Images:', val_data.__len__())
    dist = train_data_1.dist()
    print('Train Data Distribution:', dist)

    train_dataloader_1 = DataLoader(train_data_1, batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers)
    train_dataloader_2 = DataLoader(train_data_2, batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers)
    val_dataloader = DataLoader(val_data, batch_size=config.batch_size, shuffle=False, num_workers=config.num_workers)

    # ============================================= Prepare Model ============================================
    # model = PCResNet18(num_classes=config.num_classes)
    model = DualResNet18(num_classes=config.num_classes)
    print(model)

    if config.load_model_path:
        model.load(config.load_model_path)
    if config.use_gpu:
        model.cuda()
    if config.parallel:
        model = torch.nn.DataParallel(model, device_ids=[x for x in range(config.num_of_gpu)])

    # =========================================== Criterion and Optimizer =====================================
    # weight = torch.Tensor([1, 1])
    # weight = torch.Tensor([dist['1']/(dist['0']+dist['1']), dist['0']/(dist['0']+dist['1'])])  # weight需要将二者反过来,多于二分类可以取倒数
    # weight = torch.Tensor([1, 3.5])
    # weight = torch.Tensor([1, 5])
    weight = torch.Tensor([1, 7])
    vis.log(f'loss weight: {weight}')
    print('loss weight:', weight)
    weight = weight.cuda()

    criterion = torch.nn.CrossEntropyLoss(weight=weight)
    MSELoss = torch.nn.MSELoss()
    sycriterion = torch.nn.CrossEntropyLoss()

    lr = config.lr
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=config.weight_decay)

    # ================================================== Metrics ===============================================
    softmax = functional.softmax
    loss_meter = meter.AverageValueMeter()
    epoch_loss = meter.AverageValueMeter()
    mse_meter = meter.AverageValueMeter()
    epoch_mse = meter.AverageValueMeter()
    syloss_meter = meter.AverageValueMeter()
    epoch_syloss = meter.AverageValueMeter()
    total_loss_meter = meter.AverageValueMeter()
    epoch_total_loss = meter.AverageValueMeter()
    train_cm = meter.ConfusionMeter(config.num_classes)

    # ====================================== Saving and Recording Configuration =================================
    previous_auc = 0
    if config.parallel:
        save_model_dir = config.save_model_dir if config.save_model_dir else model.module.model_name
        save_model_name = config.save_model_name if config.save_model_name else model.module.model_name + '_best_model.pth'
    else:
        save_model_dir = config.save_model_dir if config.save_model_dir else model.model_name
        save_model_name = config.save_model_name if config.save_model_name else model.model_name + '_best_model.pth'
    save_epoch = 1  # 用于记录验证集上效果最好模型对应的epoch
    process_record = {'epoch_loss': [],
                      'train_avg_se': [], 'train_se_0': [], 'train_se_1': [],
                      'val_avg_se': [], 'val_se_0': [], 'val_se_1': [],
                      'AUC': []}  # 用于记录实验过程中的曲线,便于画曲线图

    # ================================================== Training ===============================================
    for epoch in range(config.max_epoch):
        print(f"epoch: [{epoch+1}/{config.max_epoch}] {config.save_model_name[:-4]} ==================================")
        train_cm.reset()
        epoch_loss.reset()
        epoch_mse.reset()
        epoch_syloss.reset()
        epoch_total_loss.reset()

        # ****************************************** train ****************************************
        model.train()
        for i, (item1, item2) in tqdm(enumerate(zip(train_dataloader_1, train_dataloader_2))):
            loss_meter.reset()
            mse_meter.reset()
            syloss_meter.reset()
            total_loss_meter.reset()

            # ------------------------------------ prepare input ------------------------------------
            image1, label1, image_path1 = item1
            image2, label2, image_path2 = item2
            if config.use_gpu:
                image1 = image1.cuda()
                image2 = image2.cuda()
                label1 = label1.cuda()
                label2 = label2.cuda()

            # ---------------------------------- go through the model --------------------------------
            # score1, score2, logits1, logits2 = model(image1, image2)  # Pairwise Confusion Network
            score1, score2, score3 = model(image1, image2)  # Dual CNN

            # ----------------------------------- backpropagate -------------------------------------
            # 两支之间的feature加入L2 norm
            # optimizer.zero_grad()
            # cls_loss1 = criterion(score1, label1)
            # cls_loss2 = criterion(score2, label2)
            #
            # ch_weight = torch.where(label1 == label2, torch.Tensor([0]).cuda(), torch.Tensor([1]).cuda())
            # ch_weight = ch_weight.view(logits1.size(0), -1)
            # mse = MSELoss(logits1 * ch_weight, logits2 * ch_weight)  # 只计算不同类之间的loss,相同类的置零
            #
            # total_loss = cls_loss1 + cls_loss2 + 10 * mse
            # total_loss.backward()
            # optimizer.step()

            # 两支之间的logits加入判断是否属于同一类的loss
            optimizer.zero_grad()
            cls_loss1 = criterion(score1, label1)
            cls_loss2 = criterion(score2, label2)

            sylabel = torch.where(label1 == label2, torch.Tensor([0]).cuda(), torch.Tensor([1]).cuda()).long()
            sy_loss = sycriterion(score3, sylabel)

            total_loss = cls_loss1 + cls_loss2 + 2 * sy_loss
            total_loss.backward()
            optimizer.step()

            # ------------------------------------ record loss ------------------------------------
            loss_meter.add((cls_loss1 + cls_loss2).item())
            # mse_meter.add(mse.item())
            # syloss_meter.add(sy_loss.item())
            # total_loss_meter.add(total_loss.item())

            epoch_loss.add((cls_loss1 + cls_loss2).item())
            # epoch_mse.add(mse.item())
            epoch_syloss.add(sy_loss.item())
            epoch_total_loss.add(total_loss.item())

            train_cm.add(softmax(score1, dim=1).detach(), label1.detach())

            if (i+1) % config.print_freq == 0:
                vis.plot('loss', loss_meter.value()[0])

        train_se = [100. * train_cm.value()[0][0] / (train_cm.value()[0][0] + train_cm.value()[0][1]),
                    100. * train_cm.value()[1][1] / (train_cm.value()[1][0] + train_cm.value()[1][1])]

        # *************************************** validate ***************************************
        model.eval()
        if (epoch + 1) % 1 == 0:
            Best_T, val_cm, val_spse, val_accuracy, AUC = val(model, val_dataloader)

            # ------------------------------------ save model ------------------------------------
            if AUC > previous_auc and epoch + 1 > 5:
                if config.parallel:
                    if not os.path.exists(os.path.join('checkpoints', save_model_dir, save_model_name.split('.')[0])):
                        os.makedirs(os.path.join('checkpoints', save_model_dir, save_model_name.split('.')[0]))
                    model.module.save(os.path.join('checkpoints', save_model_dir, save_model_name.split('.')[0], save_model_name))
                else:
                    if not os.path.exists(os.path.join('checkpoints', save_model_dir, save_model_name.split('.')[0])):
                        os.makedirs(os.path.join('checkpoints', save_model_dir, save_model_name.split('.')[0]))
                    model.save(os.path.join('checkpoints', save_model_dir, save_model_name.split('.')[0], save_model_name))
                previous_auc = AUC
                save_epoch = epoch + 1

            # ---------------------------------- recond and print ---------------------------------
            process_record['epoch_loss'].append(epoch_loss.value()[0])
            process_record['train_avg_se'].append(np.average(train_se))
            process_record['train_se_0'].append(train_se[0])
            process_record['train_se_1'].append(train_se[1])
            process_record['val_avg_se'].append(np.average(val_spse))
            process_record['val_se_0'].append(val_spse[0])
            process_record['val_se_1'].append(val_spse[1])
            process_record['AUC'].append(AUC)

            # vis.plot('epoch_mse', epoch_mse.value()[0])
            vis.plot('epoch_syloss', epoch_syloss.value()[0])
            vis.plot_many({'epoch_loss': epoch_loss.value()[0], 'epoch_total_loss': epoch_total_loss.value()[0],
                           'train_avg_se': np.average(train_se), 'train_se_0': train_se[0], 'train_se_1': train_se[1],
                           'val_avg_se': np.average(val_spse), 'val_se_0': val_spse[0], 'val_se_1': val_spse[1],
                           'AUC': AUC})
            vis.log(f"epoch: [{epoch+1}/{config.max_epoch}] =========================================")
            vis.log(f"lr: {optimizer.param_groups[0]['lr']}, loss: {round(loss_meter.value()[0], 5)}")
            vis.log(f"train_avg_se: {round(np.average(train_se), 4)}, train_se_0: {round(train_se[0], 4)}, train_se_1: {round(train_se[1], 4)}")
            vis.log(f"val_avg_se: {round(sum(val_spse)/len(val_spse), 4)}, val_se_0: {round(val_spse[0], 4)}, val_se_1: {round(val_spse[1], 4)}")
            vis.log(f"AUC: {AUC}")
            vis.log(f'train_cm: {train_cm.value()}')
            vis.log(f'Best Threshold: {Best_T}')
            vis.log(f'val_cm: {val_cm}')
            print("lr:", optimizer.param_groups[0]['lr'], "loss:", round(epoch_loss.value()[0], 5))
            print('train_avg_se:', round(np.average(train_se), 4), 'train_se_0:', round(train_se[0], 4), 'train_se_1:', round(train_se[1], 4))
            print('val_avg_se:', round(np.average(val_spse), 4), 'val_se_0:', round(val_spse[0], 4), 'val_se_1:', round(val_spse[1], 4))
            print('AUC:', AUC)
            print('train_cm:')
            print(train_cm.value())
            print('Best Threshold:', Best_T, 'val_cm:')
            print(val_cm)

            # ------------------------------------ save record ------------------------------------
            if os.path.exists(os.path.join('checkpoints', save_model_dir, save_model_name.split('.')[0])):
                write_json(file=os.path.join('checkpoints', save_model_dir, save_model_name.split('.')[0], 'process_record.json'),
                           content=process_record)

        # if (epoch+1) % 5 == 0:
        #     lr = lr * config.lr_decay
        #     for param_group in optimizer.param_groups:
        #         param_group['lr'] = lr

    vis.log(f"Best Epoch: {save_epoch}")
    print("Best Epoch:", save_epoch)
def train(**kwargs):
    config.parse(kwargs)

    # ============================================ Visualization =============================================
    vis = Visualizer(port=2333, env=config.env)
    vis.log('Use config:')
    for k, v in config.__class__.__dict__.items():
        if not k.startswith('__'):
            vis.log(f"{k}: {getattr(config, k)}")

    # ============================================= Prepare Data =============================================
    train_data = SlideWindowDataset(config.train_paths,
                                    phase='train',
                                    useRGB=config.useRGB,
                                    usetrans=config.usetrans,
                                    balance=config.data_balance)
    val_data = SlideWindowDataset(config.test_paths,
                                  phase='val',
                                  useRGB=config.useRGB,
                                  usetrans=config.usetrans,
                                  balance=False)
    print('Training Images:', train_data.__len__(), 'Validation Images:',
          val_data.__len__())
    dist = train_data.dist()
    print('Train Data Distribution:', dist)

    train_dataloader = DataLoader(train_data,
                                  batch_size=config.batch_size,
                                  shuffle=True,
                                  num_workers=config.num_workers)
    val_dataloader = DataLoader(val_data,
                                batch_size=config.batch_size,
                                shuffle=False,
                                num_workers=config.num_workers)

    # ============================================= Prepare Model ============================================
    model = UNet_Classifier(num_classes=config.num_classes)
    print(model)

    if config.load_model_path:
        model.load(config.load_model_path)
        print('Model loaded')
    if config.use_gpu:
        model.cuda()
    if config.parallel:
        model = torch.nn.DataParallel(
            model, device_ids=[x for x in range(config.num_of_gpu)])

    # =========================================== Criterion and Optimizer =====================================
    # weight = torch.Tensor([1, 1])
    # weight = torch.Tensor([dist['1']/(dist['0']+dist['1']), dist['0']/(dist['0']+dist['1'])])  # weight需要将二者反过来,多于二分类可以取倒数
    # weight = torch.Tensor([1, 3.5])
    # weight = torch.Tensor([1, 5])
    weight = torch.Tensor([1, 7])

    vis.log(f'loss weight: {weight}')
    print('loss weight:', weight)
    weight = weight.cuda()
    criterion = torch.nn.CrossEntropyLoss(weight=weight)
    lr = config.lr
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=lr,
                                 weight_decay=config.weight_decay)

    # ================================================== Metrics ===============================================
    softmax = functional.softmax
    loss_meter_edge = meter.AverageValueMeter()
    epoch_loss_edge = meter.AverageValueMeter()
    loss_meter_cls = meter.AverageValueMeter()
    epoch_loss_cls = meter.AverageValueMeter()
    loss_meter = meter.AverageValueMeter()
    epoch_loss = meter.AverageValueMeter()
    train_cm = meter.ConfusionMeter(config.num_classes)

    # ====================================== Saving and Recording Configuration =================================
    previous_auc = 0
    if config.parallel:
        save_model_dir = config.save_model_dir if config.save_model_dir else model.module.model_name
        save_model_name = config.save_model_name if config.save_model_name else model.module.model_name + '_best_model.pth'
    else:
        save_model_dir = config.save_model_dir if config.save_model_dir else model.model_name
        save_model_name = config.save_model_name if config.save_model_name else model.model_name + '_best_model.pth'
    save_epoch = 1  # 用于记录验证集上效果最好模型对应的epoch
    process_record = {
        'epoch_loss': [],
        'epoch_loss_edge': [],
        'epoch_loss_cls': [],
        'train_avg_se': [],
        'train_se_0': [],
        'train_se_1': [],
        'val_avg_se': [],
        'val_se_0': [],
        'val_se_1': [],
        'AUC': [],
        'DICE': []
    }  # 用于记录实验过程中的曲线,便于画曲线图

    # ================================================== Training ===============================================
    for epoch in range(config.max_epoch):
        print(
            f"epoch: [{epoch + 1}/{config.max_epoch}] {config.save_model_name[:-4]} =================================="
        )
        train_cm.reset()
        epoch_loss.reset()
        dice = []

        # ****************************************** train ****************************************
        model.train()
        for i, (image, label, edge_mask,
                image_path) in tqdm(enumerate(train_dataloader)):
            loss_meter.reset()

            # ------------------------------------ prepare input ------------------------------------
            if config.use_gpu:
                image = image.cuda()
                label = label.cuda()
                edge_mask = edge_mask.cuda()

            # ---------------------------------- go through the model --------------------------------
            score, score_mask = model(x=image)

            # ----------------------------------- backpropagate -------------------------------------
            optimizer.zero_grad()

            # 分类loss
            loss_cls = criterion(score, label)
            # 对Edge包含pixel加loss
            log_prob_mask = functional.logsigmoid(score_mask)
            count_edge = torch.sum(edge_mask, dim=(1, 2, 3), keepdim=True)
            loss_edge = -1 * torch.mean(
                torch.sum(
                    edge_mask * log_prob_mask, dim=(1, 2, 3), keepdim=True) /
                (count_edge + 1e-8))

            # 对非Edge包含pixel加loss
            r_prob_mask = torch.Tensor([1.0
                                        ]).cuda() - torch.sigmoid(score_mask)
            r_edge_mask = torch.Tensor([1.0]).cuda() - edge_mask
            log_rprob_mask = torch.log(r_prob_mask + 1e-5)
            count_redge = torch.sum(r_edge_mask, dim=(1, 2, 3), keepdim=True)
            loss_redge = -1 * torch.mean(
                torch.sum(r_edge_mask * log_rprob_mask,
                          dim=(1, 2, 3),
                          keepdim=True) / (count_redge + 1e-8))

            # 权重按照前景和背景的像素点数量来算
            w1 = torch.sum(count_edge).item() / (torch.sum(count_edge).item() +
                                                 torch.sum(count_redge).item())
            w2 = torch.sum(count_redge).item() / (
                torch.sum(count_edge).item() + torch.sum(count_redge).item())
            loss = loss_cls + w1 * loss_edge + w2 * loss_redge

            loss.backward()
            optimizer.step()

            # ------------------------------------ record loss ------------------------------------
            loss_meter_edge.add((w1 * loss_edge + w2 * loss_redge).item())
            epoch_loss_edge.add((w1 * loss_edge + w2 * loss_redge).item())
            loss_meter_cls.add(loss_cls.item())
            epoch_loss_cls.add(loss_cls.item())
            loss_meter.add(loss.item())
            epoch_loss.add(loss.item())
            train_cm.add(softmax(score, dim=1).detach(), label.detach())
            dice.append(
                dice_coeff(input=(score_mask > 0.5).float(),
                           target=edge_mask[:, 0, :, :]).item())

            if (i + 1) % config.print_freq == 0:
                vis.plot_many({
                    'loss': loss_meter.value()[0],
                    'loss_edge': loss_meter_edge.value()[0],
                    'loss_cls': loss_meter_cls.value()[0]
                })

        train_se = [
            100. * train_cm.value()[0][0] /
            (train_cm.value()[0][0] + train_cm.value()[0][1]),
            100. * train_cm.value()[1][1] /
            (train_cm.value()[1][0] + train_cm.value()[1][1])
        ]
        train_dice = sum(dice) / len(dice)

        # *************************************** validate ***************************************
        model.eval()
        if (epoch + 1) % 1 == 0:
            Best_T, val_cm, val_spse, val_accuracy, AUC, val_dice = val(
                model, val_dataloader)

            # ------------------------------------ save model ------------------------------------
            if AUC > previous_auc and epoch + 1 > 5:  # 5个epoch之后,当测试集上的平均sensitivity升高时保存模型
                if config.parallel:
                    if not os.path.exists(
                            os.path.join('checkpoints', save_model_dir,
                                         save_model_name[:-4])):
                        os.makedirs(
                            os.path.join('checkpoints', save_model_dir,
                                         save_model_name[:-4]))
                    model.module.save(
                        os.path.join('checkpoints', save_model_dir,
                                     save_model_name[:-4], save_model_name))
                else:
                    if not os.path.exists(
                            os.path.join('checkpoints', save_model_dir,
                                         save_model_name[:-4])):
                        os.makedirs(
                            os.path.join('checkpoints', save_model_dir,
                                         save_model_name[:-4]))
                    model.save(
                        os.path.join('checkpoints', save_model_dir,
                                     save_model_name[:-4], save_model_name))
                previous_auc = AUC
                save_epoch = epoch + 1

            # ---------------------------------- recond and print ---------------------------------
            process_record['epoch_loss'].append(epoch_loss.value()[0])
            process_record['epoch_loss_edge'].append(
                epoch_loss_edge.value()[0])
            process_record['epoch_loss_cls'].append(epoch_loss_cls.value()[0])
            process_record['train_avg_se'].append(np.average(train_se))
            process_record['train_se_0'].append(train_se[0])
            process_record['train_se_1'].append(train_se[1])
            process_record['val_avg_se'].append(np.average(val_spse))
            process_record['val_se_0'].append(val_spse[0])
            process_record['val_se_1'].append(val_spse[1])
            process_record['AUC'].append(AUC)
            process_record['DICE'].append(val_dice)

            vis.plot_many({
                'epoch_loss': epoch_loss.value()[0],
                'epoch_loss_edge': epoch_loss_edge.value()[0],
                'epoch_loss_cls': epoch_loss_cls.value()[0],
                'train_avg_se': np.average(train_se),
                'train_se_0': train_se[0],
                'train_se_1': train_se[1],
                'val_avg_se': np.average(val_spse),
                'val_se_0': val_spse[0],
                'val_se_1': val_spse[1],
                'AUC': AUC,
                'train_dice': train_dice,
                'val_dice': val_dice
            })
            vis.log(
                f"epoch: [{epoch + 1}/{config.max_epoch}] ==============================================="
            )
            vis.log(
                f"lr: {optimizer.param_groups[0]['lr']}, loss: {round(loss_meter.value()[0], 5)}"
            )
            vis.log(
                f"train_avg_se: {round(np.average(train_se), 4)}, train_se_0: {round(train_se[0], 4)}, train_se_1: {round(train_se[1], 4)}"
            )
            vis.log(f"train_dice: {round(train_dice, 4)}")
            vis.log(
                f"val_avg_se: {round(sum(val_spse) / len(val_spse), 4)}, val_se_0: {round(val_spse[0], 4)}, val_se_1: {round(val_spse[1], 4)}"
            )
            vis.log(f"val_dice: {round(val_dice, 4)}")
            vis.log(f"AUC: {AUC}")
            vis.log(f'train_cm: {train_cm.value()}')
            vis.log(f'Best Threshold: {Best_T}')
            vis.log(f'val_cm: {val_cm}')
            print("lr:", optimizer.param_groups[0]['lr'], "loss:",
                  round(epoch_loss.value()[0], 5))
            print('train_avg_se:', round(np.average(train_se), 4),
                  'train_se_0:', round(train_se[0], 4), 'train_se_1:',
                  round(train_se[1], 4))
            print('train_dice:', train_dice)
            print('val_avg_se:', round(np.average(val_spse), 4), 'val_se_0:',
                  round(val_spse[0], 4), 'val_se_1:', round(val_spse[1], 4))
            print('val_dice:', val_dice)
            print('AUC:', AUC)
            print('train_cm:')
            print(train_cm.value())
            print('Best Threshold:', Best_T, 'val_cm:')
            print(val_cm)

            # ------------------------------------ save record ------------------------------------
            if os.path.exists(
                    os.path.join('checkpoints', save_model_dir,
                                 save_model_name.split('.')[0])):
                write_json(file=os.path.join('checkpoints', save_model_dir,
                                             save_model_name[:-4],
                                             'process_record.json'),
                           content=process_record)
        # if (epoch+1) % 20 == 0:
        #     lr = lr * config.lr_decay
        #     for param_group in optimizer.param_groups:
        #         param_group['lr'] = lr

    vis.log(f"Best Epoch: {save_epoch}")
    print("Best Epoch:", save_epoch)
Exemplo n.º 8
0
def iter_train(**kwargs):
    config.parse(kwargs)

    # ============================================ Visualization =============================================
    # vis = Visualizer(port=2333, env=config.env)
    # vis.log('Use config:')
    # for k, v in config.__class__.__dict__.items():
    #     if not k.startswith('__'):
    #         vis.log(f"{k}: {getattr(config, k)}")

    # ============================================= Prepare Data =============================================
    train_data = ContextVB_Dataset(config.train_paths,
                                   phase='train',
                                   num_classes=config.num_classes,
                                   useRGB=config.useRGB,
                                   usetrans=config.usetrans,
                                   padding=config.padding,
                                   balance=config.data_balance)
    val_data = ContextVB_Dataset(config.test_paths,
                                 phase='val',
                                 num_classes=config.num_classes,
                                 useRGB=config.useRGB,
                                 usetrans=False,
                                 padding=config.padding,
                                 balance=config.data_balance)
    train_dist, val_dist = train_data.dist(), val_data.dist()
    train_data_scale, val_data_scale = train_data.scale, val_data.scale
    print('Training Images:', train_data.__len__(), 'Validation Images:',
          val_data.__len__())
    print('Train Data Distribution:', train_dist, 'Val Data Distribution:',
          val_dist)

    train_dataloader = DataLoader(train_data,
                                  batch_size=config.batch_size,
                                  shuffle=True,
                                  num_workers=config.num_workers)
    val_dataloader = DataLoader(val_data,
                                batch_size=config.batch_size,
                                shuffle=False,
                                num_workers=config.num_workers)

    # ============================================= Prepare Model ============================================
    model = ContextAlexNet(num_classes=config.num_classes)
    # model = ContextVgg16(num_classes=config.num_classes)
    # model = ContextResNet18(num_classes=config.num_classes)
    # model = ContextShareNet(num_classes=config.num_classes)
    # model = ContextResNet50(num_classes=config.num_classes)
    # print(model)

    if config.load_model_path:
        model.load(config.load_model_path)
    if config.use_gpu:
        model.cuda()
    if config.parallel:
        model = torch.nn.DataParallel(model,
                                      device_ids=list(range(
                                          config.num_of_gpu)))

    # =========================================== Criterion and Optimizer =====================================
    # criterion = torch.nn.CrossEntropyLoss(reduction='mean')
    criterion = torch.nn.CrossEntropyLoss(
        reduction='none')  # for Self-paced Learning
    # criterion = LabelSmoothing(size=config.num_classes, smoothing=0.2)
    # criterion = LabelSmoothing(size=config.num_classes, smoothing=0.2, reduction='none')  # for Self-paced Learning
    # criterion = FocalLoss(gamma=4, alpha=None)
    MSELoss = torch.nn.MSELoss()

    lr = config.lr
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=lr,
                                 weight_decay=config.weight_decay)

    # ================================================== Metrics ===============================================
    log_softmax = functional.log_softmax
    loss_meter = meter.AverageValueMeter()
    mse_meter1_2 = meter.AverageValueMeter()
    mse_meter2_3 = meter.AverageValueMeter()
    total_loss_meter = meter.AverageValueMeter()

    # ====================================== Saving and Recording Configuration =================================
    previous_AUC = 0
    previous_mAP = 0
    save_iter = 1  # 用于记录验证集上效果最好模型对应的epoch
    if config.parallel:
        save_model_dir = config.save_model_dir if config.save_model_dir else model.module.model_name
        save_model_name = config.save_model_name if config.save_model_name else model.module.model_name + '_best_model.pth'
    else:
        save_model_dir = config.save_model_dir if config.save_model_dir else model.model_name
        save_model_name = config.save_model_name if config.save_model_name else model.model_name + '_best_model.pth'
    if config.num_classes == 2:  # 2分类
        process_record = {
            'loss': [],
            'mse': [],  # 用于记录实验过程中的曲线,便于画曲线图
            'train_avg': [],
            'train_sp': [],
            'train_se': [],
            'val_avg': [],
            'val_sp': [],
            'val_se': [],
            'train_AUC': [],
            'val_AUC': []
        }
    elif config.num_classes == 3:  # 3分类
        process_record = {
            'loss': [],
            'mse': [],  # 用于记录实验过程中的曲线,便于画曲线图
            'train_sp0': [],
            'train_se0': [],
            'train_sp1': [],
            'train_se1': [],
            'train_sp2': [],
            'train_se2': [],
            'val_sp0': [],
            'val_se0': [],
            'val_sp1': [],
            'val_se1': [],
            'val_sp2': [],
            'val_se2': [],
            'train_mAUC': [],
            'val_mAUC': [],
            'train_mAP': [],
            'val_mAP': []
        }
    else:
        raise ValueError

    # ================================================== Training ===============================================
    iteration = 0
    # ****************************************** train ****************************************
    train_iter = iter(train_dataloader)
    model.train()
    while iteration < config.max_iter:
        # Fine-tune with clean data after 4000 epochs
        # if iteration == 4000:
        #     train_data = ContextVB_Dataset(config.train_paths, phase='train', num_classes=config.num_classes,
        #                                    useRGB=config.useRGB, usetrans=False, padding=config.padding,
        #                                    balance=config.data_balance)
        #     train_dataloader = DataLoader(train_data, batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers)
        #     train_iter = iter(train_dataloader)

        try:
            image, label, image_path = next(train_iter)
        except:
            train_iter = iter(train_dataloader)
            image, label, image_path = next(train_iter)

        iteration += 1

        # ------------------------------------ prepare input ------------------------------------
        if config.use_gpu:
            last_image, cur_image, next_image = image[0].cuda(), image[1].cuda(
            ), image[2].cuda()
            last_label, cur_label, next_label = label[0].cuda(), label[1].cuda(
            ), label[2].cuda()
        else:
            last_image, cur_image, next_image = image[0], image[1], image[2]
            last_label, cur_label, next_label = label[0], label[1], label[2]

        # ---------------------------------- go through the model --------------------------------
        # score = model(last_image, cur_image, next_image)
        score, diff1, diff2 = model(last_image, cur_image, next_image)
        # score, f1, f2, f3 = model(last_image, cur_image, next_image)

        # ----------------------------------- backpropagate -------------------------------------
        # 单支loss
        # optimizer.zero_grad()
        # loss = criterion(score, cur_label)
        # # loss = criterion(log_softmax(score, dim=1), cur_label)  # LabelSmoothing
        # loss.backward()
        # optimizer.step()

        # 加入每两支之间的回归loss
        # optimizer.zero_grad()
        # loss = criterion(score, cur_label)
        # # loss = criterion(log_softmax(score, dim=1), cur_label)  # LabelSmoothing
        # mse1_2 = MSELoss(diff1, torch.abs(cur_label - last_label).float())
        # mse2_3 = MSELoss(diff2, torch.abs(cur_label - next_label).float())
        # total_loss = loss + 0.2 * (mse1_2 + mse2_3)
        # total_loss.backward()
        # optimizer.step()

        # 使用Self-paced Learning + 每两支之间的MSE loss
        if iteration < 500:
            optimizer.zero_grad()
            loss = criterion(score, cur_label)
            # loss = criterion(log_softmax(score, dim=1), cur_label)  # LabelSmoothing
            loss = torch.sum(loss) / config.batch_size
            mse1_2 = MSELoss(diff1, torch.abs(cur_label - last_label).float())
            mse2_3 = MSELoss(diff2, torch.abs(cur_label - next_label).float())
            total_loss = loss + 0.2 * (mse1_2 + mse2_3)
            total_loss.backward()
            optimizer.step()
        else:
            optimizer.zero_grad()
            loss = criterion(score, cur_label)
            # loss = criterion(log_softmax(score, dim=1), cur_label)  # LabelSmoothing
            T = np.percentile(loss.data.cpu().numpy(), 90)
            loss = torch.where(loss > T, torch.Tensor([0]).cuda(), loss)
            count = torch.sum(
                torch.where(loss > 0,
                            torch.Tensor([1]).cuda(), loss))
            loss = torch.sum(loss) / count
            mse1_2 = MSELoss(diff1, torch.abs(cur_label - last_label).float())
            mse2_3 = MSELoss(diff2, torch.abs(cur_label - next_label).float())
            total_loss = loss + 0.2 * (mse1_2 + mse2_3)
            total_loss.backward()
            optimizer.step()

        # 模仿pairwise loss函数的设计方式,两支之间的feature加入L2 norm
        # optimizer.zero_grad()
        # loss = criterion(score, cur_label)
        # ch_weight12 = torch.where(cur_label == last_label, torch.Tensor([0]).cuda(), torch.Tensor([1]).cuda())
        # ch_weight23 = torch.where(cur_label == next_label, torch.Tensor([0]).cuda(), torch.Tensor([1]).cuda())
        # ch_weight12 = ch_weight12.view(cur_label.size(0), 1, 1, 1)
        # ch_weight23 = ch_weight23.view(cur_label.size(0), 1, 1, 1)
        #
        # mse1_2 = MSELoss(f1 * ch_weight12, f2 * ch_weight12)  # 只计算不同类之间的loss,相同类的置零
        # mse2_3 = MSELoss(f2 * ch_weight23, f3 * ch_weight23)
        # total_loss = loss + 1 * (mse1_2 + mse2_3)
        # total_loss.backward()
        # optimizer.step()

        # ------------------------------------ record loss ------------------------------------
        loss_meter.add(loss.item())
        mse_meter1_2.add(mse1_2.item())
        mse_meter2_3.add(mse2_3.item())
        total_loss_meter.add(total_loss.item())

        if iteration % config.print_freq == 0:
            tqdm.write(
                f"iter: [{iteration}/{config.max_iter}] {config.save_model_name[:-4]} =================================="
            )

            # *************************************** validate ***************************************
            if config.num_classes == 2:  # 2分类
                model.eval()
                train_cm, train_AUC, train_sp, train_se, train_T, train_accuracy = val_2class(
                    model, train_dataloader, train_dist)
                val_cm, val_AUC, val_sp, val_se, val_T, val_accuracy = val_2class(
                    model, val_dataloader, val_dist)
                model.train()

                # ------------------------------------ save model ------------------------------------
                if val_AUC > previous_AUC:  # 当测试集上的AUC升高时保存模型
                    if config.parallel:
                        if not os.path.exists(
                                os.path.join('checkpoints', save_model_dir,
                                             save_model_name[:-4])):
                            os.makedirs(
                                os.path.join('checkpoints', save_model_dir,
                                             save_model_name[:-4]))
                        model.module.save(
                            os.path.join('checkpoints', save_model_dir,
                                         save_model_name[:-4],
                                         save_model_name))
                    else:
                        if not os.path.exists(
                                os.path.join('checkpoints', save_model_dir,
                                             save_model_name[:-4])):
                            os.makedirs(
                                os.path.join('checkpoints', save_model_dir,
                                             save_model_name[:-4]))
                        model.save(
                            os.path.join('checkpoints', save_model_dir,
                                         save_model_name[:-4],
                                         save_model_name))
                    previous_AUC = val_AUC
                    save_iter = iteration

                # ---------------------------------- recond and print ---------------------------------
                process_record['loss'].append(loss_meter.value()[0])
                process_record['mse'].append(mse_meter1_2.value()[0] +
                                             mse_meter2_3.value()[0])
                process_record['train_avg'].append((train_sp + train_se) / 2)
                process_record['train_sp'].append(train_sp)
                process_record['train_se'].append(train_se)
                process_record['train_AUC'].append(train_AUC)
                process_record['val_avg'].append((val_sp + val_se) / 2)
                process_record['val_sp'].append(val_sp)
                process_record['val_se'].append(val_se)
                process_record['val_AUC'].append(val_AUC)

                # vis.plot_many({'loss': loss_meter.value()[0],
                #                'train_avg': (train_sp + train_se) / 2, 'train_sp': train_sp, 'train_se': train_se,
                #                'val_avg': (val_sp + val_se) / 2, 'val_sp': val_sp, 'val_se': val_se,
                #                'train_AUC': train_AUC, 'val_AUC': val_AUC})
                # vis.log(f"iter: [{iteration}/{config.max_iter}] =========================================")
                # vis.log(f"lr: {optimizer.param_groups[0]['lr']}, loss: {round(loss_meter.value()[0], 5)}")
                # vis.log(f"train_avg: {round((train_sp + train_se) / 2, 4)}, train_sp: {round(train_sp, 4)}, train_se: {round(train_se, 4)}")
                # vis.log(f"val_avg: {round((val_sp + val_se) / 2, 4)}, val_sp: {round(val_sp, 4)}, val_se: {round(val_se, 4)}")
                # vis.log(f'train_AUC: {train_AUC}')
                # vis.log(f'val_AUC: {val_AUC}')
                # vis.log(f'train_cm: {train_cm}')
                # vis.log(f'val_cm: {val_cm}')
                print("lr:", optimizer.param_groups[0]['lr'], "loss:",
                      round(loss_meter.value()[0], 5))
                print('train_avg:', round((train_sp + train_se) / 2, 4),
                      'train_sp:', round(train_sp, 4), 'train_se:',
                      round(train_se, 4))
                print('val_avg:', round((val_sp + val_se) / 2, 4), 'val_sp:',
                      round(val_sp, 4), 'val_se:', round(val_se, 4))
                print('train_AUC:', train_AUC, 'val_AUC:', val_AUC)
                print('train_cm:')
                print(train_cm)
                print('val_cm:')
                print(val_cm)

            elif config.num_classes == 3:  # 3分类
                model.eval()
                train_cm, train_mAP, train_sp, train_se, train_mAUC, train_accuracy = val_3class(
                    model, train_dataloader, train_data_scale)
                val_cm, val_mAP, val_sp, val_se, val_mAUC, val_accuracy = val_3class(
                    model, val_dataloader, val_data_scale)
                model.train()

                # ------------------------------------ save model ------------------------------------
                if val_mAP > previous_mAP:  # 当测试集上的mAP升高时保存模型
                    if config.parallel:
                        if not os.path.exists(
                                os.path.join('checkpoints', save_model_dir,
                                             save_model_name[:-4])):
                            os.makedirs(
                                os.path.join('checkpoints', save_model_dir,
                                             save_model_name[:-4]))
                        model.module.save(
                            os.path.join('checkpoints', save_model_dir,
                                         save_model_name[:-4],
                                         save_model_name))
                    else:
                        if not os.path.exists(
                                os.path.join('checkpoints', save_model_dir,
                                             save_model_name[:-4])):
                            os.makedirs(
                                os.path.join('checkpoints', save_model_dir,
                                             save_model_name[:-4]))
                        model.save(
                            os.path.join('checkpoints', save_model_dir,
                                         save_model_name[:-4],
                                         save_model_name))
                    previous_mAP = val_mAP
                    save_iter = iteration

                # ---------------------------------- recond and print ---------------------------------
                process_record['loss'].append(loss_meter.value()[0])
                process_record['mse'].append(mse_meter1_2.value()[0] +
                                             mse_meter2_3.value()[0])
                process_record['train_sp0'].append(train_sp[0])
                process_record['train_se0'].append(train_se[0])
                process_record['train_sp1'].append(train_sp[1])
                process_record['train_se1'].append(train_se[1])
                process_record['train_sp2'].append(train_sp[2])
                process_record['train_se2'].append(train_se[2])
                process_record['train_mAUC'].append(float(train_mAUC))
                process_record['train_mAP'].append(float(train_mAP))
                process_record['val_sp0'].append(val_sp[0])
                process_record['val_se0'].append(val_se[0])
                process_record['val_sp1'].append(val_sp[1])
                process_record['val_se1'].append(val_se[1])
                process_record['val_sp2'].append(val_sp[2])
                process_record['val_se2'].append(val_se[2])
                process_record['val_mAUC'].append(float(val_mAUC))
                process_record['val_mAP'].append(float(val_mAP))

                # vis.plot_many({'mse1': mse_meter1_2.value()[0], 'mse2': mse_meter2_3.value()[0],
                #                'total_loss': total_loss_meter.value()[0]})
                # vis.plot_many({'loss': loss_meter.value()[0],
                #                'train_sp0': train_se[0], 'train_sp1': train_se[1], 'train_sp2': train_se[2],
                #                'train_se0': train_se[0], 'train_se1': train_se[1], 'train_se2': train_se[2],
                #                'val_sp0': val_se[0], 'val_sp1': val_se[1], 'val_sp2': val_se[2],
                #                'val_se0': val_se[0], 'val_se1': val_se[1], 'val_se2': val_se[2],
                #                'train_mAP': train_mAP, 'val_mAP': val_mAP})
                # vis.log(f"iter: [{iteration}/{config.max_iter}] =========================================")
                # vis.log(f"lr: {optimizer.param_groups[0]['lr']}, loss: {round(loss_meter.value()[0], 5)}")
                # vis.log(f"train_sp0: {round(train_sp[0], 4)}, train_sp1: {round(train_sp[1], 4)}, train_sp2: {round(train_sp[2], 4)}")
                # vis.log(f"train_se0: {round(train_se[0], 4)}, train_se1: {round(train_se[1], 4)}, train_se2: {round(train_se[2], 4)}")
                # vis.log(f"val_sp0: {round(val_sp[0], 4)}, val_sp1: {round(val_sp[1], 4)}, val_sp2: {round(val_sp[2], 4)}")
                # vis.log(f"val_se0: {round(val_se[0], 4)}, val_se1: {round(val_se[1], 4)}, val_se2: {round(val_se[2], 4)}")
                # vis.log(f"train_mAP: {train_mAP}, val_mAP: {val_mAP}")
                # vis.log(f'train_cm: {train_cm}')
                # vis.log(f'val_cm: {val_cm}')
                # print("lr:", optimizer.param_groups[0]['lr'], "loss:", round(loss_meter.value()[0], 5))
                print(
                    "lr:", optimizer.param_groups[0]['lr'], "loss:",
                    round(loss_meter.value()[0], 5), "mse:",
                    round(mse_meter1_2.value()[0] + mse_meter2_3.value()[0],
                          5))
                print('train_sp0:', round(train_sp[0], 4), 'train_sp1:',
                      round(train_sp[1], 4), 'train_sp2:',
                      round(train_sp[2], 4))
                print('train_se0:', round(train_se[0], 4), 'train_se1:',
                      round(train_se[1], 4), 'train_se2:',
                      round(train_se[2], 4))
                print('val_sp0:', round(val_sp[0], 4), 'val_sp1:',
                      round(val_sp[1], 4), 'val_sp2:', round(val_sp[2], 4))
                print('val_se0:', round(val_se[0], 4), 'val_se1:',
                      round(val_se[1], 4), 'val_se2:', round(val_se[2], 4))
                print('mSP:', round(sum(val_sp) / 3, 5), 'mSE:',
                      round(sum(val_se) / 3, 5))
                print('train_mAUC:', train_mAUC, 'val_mAUC:', val_mAUC)
                print('train_mAP:', train_mAP, 'val_mAP:', val_mAP)
                print('train_cm:')
                print(train_cm)
                print('val_cm:')
                print(val_cm)
                print('Best mAP:', previous_mAP)

            loss_meter.reset()

        # ------------------------------------ save record ------------------------------------
        if os.path.exists(
                os.path.join('checkpoints', save_model_dir,
                             save_model_name.split('.')[0])):
            write_json(file=os.path.join('checkpoints', save_model_dir,
                                         save_model_name.split('.')[0],
                                         'process_record.json'),
                       content=process_record)

    # vis.log(f"Best Iter: {save_iter}")
    print("Best Iter:", save_iter)