示例#1
0
def inference(args, model, test_save_path=None):
    from datasets.dataset_HuBMAP import HuBMAP_dataset,RandomGenerator
    from datasets.dataset_HuBMAP import HuBMAP_dataset, Generator
    db_test = HuBMAP_dataset(base_dir=args.root_path, split="test", list_dir=args.list_dir,transform=transforms.Compose(
                                   [Generator(output_size=[args.img_size, args.img_size])]))
    testloader = DataLoader(db_test, batch_size=batch_size, shuffle=True, num_workers=1)
    logging.info("{} test iterations per epoch".format(len(testloader)))
    model.eval()
    metric_list = 0.0
    ### Add validation here
    total_test_loss = 0
    total_test_dice_loss = 0
    batch_num = 0
    label_batch_sum = 0
    ce_loss = CrossEntropyLoss()
    num_classes = args.num_classes
    dice_loss = DiceLoss(num_classes)
    for i_batch, sampled_batch in enumerate(testloader):
        print(" testing progress: {:.2f}".format(batch_num/len(testloader)*100) + "%", end="\r")
        model.eval()
        image_batch, label_batch = sampled_batch['image'], sampled_batch['label']
        image_batch, label_batch = image_batch.cuda(), label_batch.cuda()
        #print(label_batch.size())
        a = np.sum(label_batch.detach().cpu().numpy())
        print(a)
        outputs = model(image_batch)
        if a>label_batch_sum:
            label_batch_sum = a
            np.save('test_pred.npy', outputs.detach().cpu().numpy())
            np.save('test_img.npy', image_batch.detach().cpu().numpy())
            np.save('test_label.npy',label_batch.detach().cpu().numpy())
        
        loss_ce = ce_loss(outputs, label_batch[:].long())
        loss_dice = dice_loss(outputs, label_batch, softmax=True)
        loss = 0.5 * loss_ce + 0.5 * loss_dice


        ###
        total_test_loss += loss.item()
        total_test_dice_loss += loss_dice.item()
        ###

        batch_num = batch_num + 1

    avg_test_loss = total_test_loss/batch_num   
    avg_test_loss_dice = total_test_dice_loss/batch_num
    print(avg_test_loss_dice)
    writer = SummaryWriter(snapshot_path + '/log')
    writer.add_scalar('info/avg_test_loss', avg_test_loss)
    writer.add_scalar('info/avg_test_loss_dice', avg_test_loss_dice)
    logging.info('test_loss : %f, test_loss_dice: %f' % (avg_test_loss, avg_test_loss_dice))    


    ###
    return "Testing Finished!"
示例#2
0
 def __init__(self, n_channels, n_classes):
     super(Unet, self).__init__()
     self.criterion = DiceLoss()
     self.inc = InConv(n_channels, 64)
     self.down1 = Down(64, 128)
     self.down2 = Down(128, 256)
     self.down3 = Down(256, 512)
     self.down4 = Down(512, 512)
     self.up1 = Up(1024, 256)
     self.up2 = Up(512, 128)
     self.up3 = Up(256, 64)
     self.up4 = Up(128, 64)
     self.outc = OutConv(64, n_classes)
示例#3
0
def get_compiled(loss={
    "clf": 'categorical_crossentropy',
    "seg": DiceLoss()
},
                 optimizer='adam',
                 metrics={
                     'clf': ['categorical_accuracy', precision, recall],
                     'seg': [precision, recall]
                 },
                 loss_weights={
                     "clf": 1.,
                     "seg": .2
                 },
                 weights=None,
                 **kwargs):
    model = get_model(weights=weights, **kwargs)
    model.compile(loss=loss,
                  optimizer=optimizer,
                  metrics=metrics,
                  loss_weights=loss_weights)
    return model
示例#4
0
def main(args):
    #################### init logger ###################################
    log_dir = './eval' + '/{}'.format(args.dataset) + '/{}'.format(args.model)
    logger = get_logger(log_dir)
    print('RUNDIR: {}'.format(log_dir))
    logger.info('{}-Eval'.format(args.model))
    # setting
    args.save_path = log_dir
    args.save_images = os.path.join(args.save_path, "images")
    if not os.path.exists(args.save_images):
        os.mkdir(args.save_images)
    ##################### init device #################################
    if args.manualSeed is None:
        args.manualSeed = random.randint(1, 10000)
    np.random.seed(args.manualSeed)
    torch.manual_seed(args.manualSeed)
    args.use_cuda = args.gpus > 0 and torch.cuda.is_available()
    args.device = torch.device('cuda' if args.use_cuda else 'cpu')
    if args.use_cuda:
        torch.cuda.manual_seed(args.manualSeed)
        cudnn.benchmark = True
    ####################### init dataset ###########################################
    val_loader = get_dataloder(args, split_flag="valid")

    ######################## init model ############################################
    if args.model == "layer7_double_deep_ep1600_8lr4e-3":
        args.deepsupervision = True
        args.double_down_channel = True
        args.genotype_name = 'layer7_double_deep'
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(genotype=genotype,
                                  input_c=args.in_channels,
                                  c=args.init_channels,
                                  num_classes=args.nclass,
                                  meta_node_num=args.middle_nodes,
                                  layers=9,
                                  dp=args.dropout_prob,
                                  use_sharing=args.use_sharing,
                                  double_down_channel=args.double_down_channel,
                                  aux=args.aux)
        args.model_path = './logs/cvc/layer7_double_deep_ep1600_8lr4e-3/model_best.pth.tar'
        model.load_state_dict(
            torch.load(args.model_path, map_location='cpu')['state_dict'])

    elif args.model == "alpha0_double_deep":
        args.deepsupervision = True
        args.double_down_channel = True
        args.genotype_name = 'alpha0_stage1_double_deep_ep200'
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(genotype=genotype,
                                  input_c=args.in_channels,
                                  c=args.init_channels,
                                  num_classes=args.nclass,
                                  meta_node_num=args.middle_nodes,
                                  layers=args.layers,
                                  dp=args.dropout_prob,
                                  use_sharing=args.use_sharing,
                                  double_down_channel=args.double_down_channel,
                                  aux=args.aux)
        args.model_path = './logs/cvc/alpha0_8lr4e-3/model_best.pth.tar'
        state_dict = torch.load(args.model_path,
                                map_location='cpu')['state_dict']
        state_dict = remove_module(state_dict)
        model.load_state_dict(state_dict)

    elif args.model == "alpha0_5_double_deep":
        args.deepsupervision = True
        args.double_down_channel = True
        args.genotype_name = 'alpha0_5_stage1_double_deep_ep80'
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(genotype=genotype,
                                  input_c=args.in_channels,
                                  c=args.init_channels,
                                  num_classes=args.nclass,
                                  meta_node_num=args.middle_nodes,
                                  layers=args.layers,
                                  dp=args.dropout_prob,
                                  use_sharing=args.use_sharing,
                                  double_down_channel=args.double_down_channel,
                                  aux=args.aux)
        args.model_path = './logs/cvc/alpha0_5_8lr4e-3/model_best.pth.tar'
        state_dict = torch.load(args.model_path,
                                map_location='cpu')['state_dict']
        state_dict = remove_module(state_dict)
        model.load_state_dict(state_dict)

    elif args.model == "alpha1_double_deep":
        args.deepsupervision = True
        args.double_down_channel = True
        args.genotype_name = 'alpha0_5_stage1_double_deep_ep80'
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(genotype=genotype,
                                  input_c=args.in_channels,
                                  c=args.init_channels,
                                  num_classes=args.nclass,
                                  meta_node_num=args.middle_nodes,
                                  layers=args.layers,
                                  dp=args.dropout_prob,
                                  use_sharing=args.use_sharing,
                                  double_down_channel=args.double_down_channel,
                                  aux=args.aux)
        args.model_path = './logs/cvc/alpha1_8lr4e-3/model_best.pth.tar'
        state_dict = torch.load(args.model_path,
                                map_location='cpu')['state_dict']
        state_dict = remove_module(state_dict)
        model.load_state_dict(state_dict)

    else:
        raise NotImplementedError()

    setting = {k: v for k, v in args._get_kwargs()}
    logger.info(setting)
    logger.info(genotype)
    logger.info('param size = %fMB', calc_parameters_count(model))
    # init loss
    if args.loss == 'bce':
        criterion = nn.BCELoss()
    elif args.loss == 'bcelog':
        criterion = nn.BCEWithLogitsLoss()
    elif args.loss == "dice":
        criterion = DiceLoss()
    elif args.loss == "softdice":
        criterion = SoftDiceLoss()
    elif args.loss == 'bcedice':
        criterion = BCEDiceLoss()
    else:
        criterion = nn.CrossEntropyLoss()
    if args.use_cuda:
        logger.info("load model and criterion to gpu !")
    model = model.to(args.device)
    criterion = criterion.to(args.device)

    infer(args, model, criterion, val_loader, logger, args.save_images)
from torchmeta.utils.data import BatchMetaDataLoader

from maml import ModelAgnosticMetaLearning
from data import get_datasets
from models import Unet, ResUnet, FCN8
from utils import FocalLoss, BCEDiceFocalLoss, plot_errors, plot_accuracy, plot_iou, DiceLoss

import math, time
import json, os, logging

download_data = True  # Download data to local file (won't download if already there)
bce_dice_focal = False  # If True, adjusts y_lim in error plot
augment = True  # Use data augmentation

#loss_function = torch.nn.BCEWithLogitsLoss()
loss_function = DiceLoss()
"""not working:"""
#loss_function = torch.nn.CrossEntropyLoss()
#loss_function = FocalLoss()
#loss_function = BCEDiceFocalLoss()
#bce_dice_focal = True


def main(args):

    logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
    device = torch.device(
        'cuda' if args.use_cuda and torch.cuda.is_available() else 'cpu')

    # Create output folder
    if (args.output_folder is not None):
示例#6
0
import torch
from torch.nn.utils import clip_grad_norm
import torch.nn.functional as F

import time
import os
import ipdb

from utils import AverageMeter, calculate_accuracy, f1_score, fuse_2d, grad_cam, show_cam_on_image, ModelOutputs
import numpy as np
from sklearn.metrics import average_precision_score, roc_auc_score
from utils import DiceLoss

from apex import amp

dice_loss = DiceLoss()


def train_epoch(epoch, data_loader, model, criterion, optimizer, opt, logger):
    print('train at epoch {}'.format(epoch))

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()

    # for tsn
    if opt.model_type == 'tsn':
        if opt.no_partialbn:
            model.module.partialBN(False)
        else:
            model.module.partialBN(True)
示例#7
0
文件: train.py 项目: tadeephuy/UDA
    # else:
    #     model.apply(weights_init)
    ####################

    #### SE-Densenet 121 ####
    from densenet import se_densenet121
    model = se_densenet121(pretrained=False, num_channels=1, num_classes=5)
    if opt.weight_dir:
        model.load_state_dict(torch.load(opt.weight_dir))
        print('weight loaded')
    #########################
    model = model.to(device)

    # Optimization
    cross_entropy = nn.BCEWithLogitsLoss().to(device)  # supervised loss
    dice_loss = DiceLoss().to(device)
    auc_loss = AUCLoss().to(device)
    kl_divergence = nn.KLDivLoss(reduction='batchmean').to(
        device)  # unsupervised loss (consistency loss)
    # proxy_w*(ce_w*ce_loss + d_w*d_loss) + a_w*a_loss
    proxy_weight, ce_weight, d_weight, a_weight = 1.5, 0.8, 0.0, 1.0
    if uda:
        supervised_weight, unsupervised_weight = 1.0, 5.0
    else:
        supervised_weight, unsupervised_weight = 1.0, 0.0

    optimizer = optim.SGD(model.parameters(),
                          lr=opt.lr,
                          momentum=0.99,
                          nesterov=True,
                          weight_decay=5e-4)
示例#8
0
def main(args):

    #args.model_list=['alpha0_double_deep_0.01','alpha0_5_double_deep_0.01','alpha1_double_deep_0.01','nodouble_deep','slim_dd','slim_double','slim_nodouble','slim_nodouble_deep']
    #args.model_list=["double_deep","nodouble_deep","slim_nodouble"]
    #args.model_list=["slim_nodouble_deep_init32"]
    #args.model_list=["slim_nodouble_deep_init48"]
    args.model_list = [
        'alpha0_double_deep_0.01', 'alpha0_5_double_deep_0.01',
        'alpha1_double_deep_0.01'
    ]

    for model_name in args.model_list:
        if model_name == "alpha0_double_deep_0.01":
            args.deepsupervision = True
            args.double_down_channel = True
            args.genotype_name = 'alpha0_stage1_double_deep_ep200'
            genotype = eval('genotypes.%s' % args.genotype_name)
            model = BuildNasUnetPrune(
                genotype=genotype,
                input_c=args.in_channels,
                c=args.init_channels,
                num_classes=args.nclass,
                meta_node_num=args.middle_nodes,
                layers=args.layers,
                dp=args.dropout_prob,
                use_sharing=args.use_sharing,
                double_down_channel=args.double_down_channel,
                aux=args.aux)
            args.model_path = './logs/isic2018/alpha0_double_deep_0.01/model_best.pth.tar'
            # kwargs = {'map_location': lambda storage, loc: storage.cuda(0)}
            # state_dict = torch.load(args.model_path, **kwargs)
            # # create new OrderedDict that does not contain `module.`
            # model.load_state_dict(state_dict)

            state_dict = torch.load(args.model_path,
                                    map_location='cpu')['state_dict']
            state_dict = remove_module(state_dict)
            model.load_state_dict(state_dict)

        elif model_name == "alpha0_5_double_deep_0.01":
            args.deepsupervision = True
            args.double_down_channel = True
            args.genotype_name = 'alpha0_5_stage1_double_deep_ep80'
            genotype = eval('genotypes.%s' % args.genotype_name)
            model = BuildNasUnetPrune(
                genotype=genotype,
                input_c=args.in_channels,
                c=args.init_channels,
                num_classes=args.nclass,
                meta_node_num=args.middle_nodes,
                layers=args.layers,
                dp=args.dropout_prob,
                use_sharing=args.use_sharing,
                double_down_channel=args.double_down_channel,
                aux=args.aux)
            args.model_path = './logs/isic2018/alpha0_5_double_deep_0.01/model_best.pth.tar'
            state_dict = torch.load(args.model_path,
                                    map_location='cpu')['state_dict']
            state_dict = remove_module(state_dict)
            model.load_state_dict(state_dict)
            #model.load_state_dict(torch.load(args.model_path, map_location='cpu')['state_dict'])

        elif model_name == "alpha1_double_deep_0.01":
            args.deepsupervision = True
            args.double_down_channel = True
            args.genotype_name = 'alpha1_stage1_double_deep_ep200'
            genotype = eval('genotypes.%s' % args.genotype_name)
            model = BuildNasUnetPrune(
                genotype=genotype,
                input_c=args.in_channels,
                c=args.init_channels,
                num_classes=args.nclass,
                meta_node_num=args.middle_nodes,
                layers=args.layers,
                dp=args.dropout_prob,
                use_sharing=args.use_sharing,
                double_down_channel=args.double_down_channel,
                aux=args.aux)
            args.model_path = './logs/isic2018/alpha1_double_deep_0.01/model_best.pth.tar'
            state_dict = torch.load(args.model_path,
                                    map_location='cpu')['state_dict']
            state_dict = remove_module(state_dict)
            model.load_state_dict(state_dict)

            #model.load_state_dict(torch.load(args.model_path, map_location='cpu')['state_dict'])

        #################### init logger ###################################
        log_dir = './eval' + '/{}'.format(
            args.dataset) + '/{}'.format(model_name)
        ##################### init model ########################################
        logger = get_logger(log_dir)
        print('RUNDIR: {}'.format(log_dir))
        logger.info('{}-Eval'.format(model_name))
        # setting
        args.save_path = log_dir
        args.save_images = os.path.join(args.save_path, "images")
        if not os.path.exists(args.save_images):
            os.mkdir(args.save_images)
        ##################### init device #################################
        if args.manualSeed is None:
            args.manualSeed = random.randint(1, 10000)
        np.random.seed(args.manualSeed)
        torch.manual_seed(args.manualSeed)
        args.use_cuda = args.gpus > 0 and torch.cuda.is_available()
        args.device = torch.device('cuda' if args.use_cuda else 'cpu')
        if args.use_cuda:
            torch.cuda.manual_seed(args.manualSeed)
            cudnn.benchmark = True
        ####################### init dataset ###########################################
        # sorted vaild datasets
        val_loader = get_dataloder(args, split_flag="valid")
        setting = {k: v for k, v in args._get_kwargs()}
        logger.info(setting)
        logger.info(genotype)
        logger.info('param size = %fMB', calc_parameters_count(model))

        # init loss
        if args.loss == 'bce':
            criterion = nn.BCELoss()
        elif args.loss == 'bcelog':
            criterion = nn.BCEWithLogitsLoss()
        elif args.loss == "dice":
            criterion = DiceLoss()
        elif args.loss == "softdice":
            criterion = SoftDiceLoss()
        elif args.loss == 'bcedice':
            criterion = BCEDiceLoss()
        else:
            criterion = nn.CrossEntropyLoss()
        if args.use_cuda:
            logger.info("load model and criterion to gpu !")
        model = model.to(args.device)
        criterion = criterion.to(args.device)
        infer(args, model, criterion, val_loader, logger, args.save_images)
示例#9
0
def main(train_args):
    backbone = ResNet()
    backbone.load_state_dict(torch.load(
        './weight/resnet34-333f7ec4.pth'), strict=False)
    net = Decoder34(num_classes=13, backbone=backbone).cuda()
    D = discriminator(input_channels=16).cuda()
    if len(train_args['snapshot']) == 0:
        curr_epoch = 1
        train_args['best_record'] = {
            'epoch': 0, 'val_loss': 1e10, 'acc': 0, 'acc_cls': 0, 'mean_iu': 0, 'fwavacc': 0}
    else:
        print('training resumes from ' + train_args['snapshot'])
        net.load_state_dict(torch.load(os.path.join(
            ckpt_path, exp_name, train_args['snapshot'])))
        split_snapshot = train_args['snapshot'].split('_')
        curr_epoch = int(split_snapshot[1]) + 1
        train_args['best_record'] = {'epoch': int(split_snapshot[1]), 'val_loss': float(split_snapshot[3]),
                                     'acc': float(split_snapshot[5]), 'acc_cls': float(split_snapshot[7]),
                                     'mean_iu': float(split_snapshot[9]), 'fwavacc': float(split_snapshot[11])}

    net.train()
    D.train()

    mean_std = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    input_transform = standard_transforms.Compose([
        standard_transforms.ToTensor(),
        standard_transforms.Normalize(*mean_std)
    ])
    target_transform = extended_transforms.MaskToTensor()
    restore_transform = standard_transforms.Compose([
        extended_transforms.DeNormalize(*mean_std),
        standard_transforms.ToPILImage(),
    ])
    visualize = standard_transforms.Compose([
        standard_transforms.Scale(400),
        standard_transforms.CenterCrop(400),
        standard_transforms.ToTensor()
    ])

    train_set = wp.Wp('train', transform=input_transform,
                      target_transform=target_transform)
    train_loader = DataLoader(train_set, batch_size=4,
                              num_workers=4, shuffle=True)
    # val_set = wp.Wp('val', transform=input_transform,
    #                 target_transform=target_transform)
    # XR:所以这里本来就不能用到val?这里为什么不用一个val的数据集呢?
    val_loader = DataLoader(train_set, batch_size=1,
                            num_workers=4, shuffle=False)
    criterion = DiceLoss().cuda()
    criterion_D = nn.BCELoss().cuda()
    optimizer_AE = optim.Adam([
        {'params': [param for name, param in net.named_parameters() if name[-4:] == 'bias'],
         'lr': 2 * train_args['lr']},
        {'params': [param for name, param in net.named_parameters() if name[-4:] != 'bias'],
         'lr': train_args['lr'], 'weight_decay': train_args['weight_decay']}
    ], betas=(train_args['momentum'], 0.999))
    optimizer_D = optim.Adam([
        {'params': [param for name, param in D.named_parameters() if name[-4:] == 'bias'],
         'lr': 2 * train_args['lr']},
        {'params': [param for name, param in D.named_parameters() if name[-4:] != 'bias'],
         'lr': train_args['lr'], 'weight_decay': train_args['weight_decay']}
    ], betas=(train_args['momentum'], 0.999))

    if len(train_args['snapshot']) > 0:
        optimizer_AE.load_state_dict(torch.load(os.path.join(
            ckpt_path, exp_name, 'opt_' + train_args['snapshot'])))
        optimizer_AE.param_groups[0]['lr'] = 2 * train_args['lr']
        optimizer_AE.param_groups[1]['lr'] = train_args['lr']

    check_mkdir(ckpt_path)
    check_mkdir(os.path.join(ckpt_path, exp_name))
    open(os.path.join(ckpt_path, exp_name, str(datetime.datetime.now()) +
                      '.txt'), 'w').write(str(train_args) + '\n\n')

    scheduler = ReduceLROnPlateau(
        optimizer_AE, 'min', patience=train_args['lr_patience'], min_lr=1e-10, verbose=True)
    for epoch in range(curr_epoch, train_args['epoch_num'] + 1):
        train(train_loader, net, D, criterion, criterion_D, optimizer_AE,
              optimizer_D, epoch, train_args)
        val_loss = validate(val_loader, net, criterion, optimizer_AE,
                            epoch, train_args, restore_transform, visualize)
        scheduler.step(val_loss)
示例#10
0
def main(args):
    #################### init logger ###################################
    args.model='unet'
    model_weight_path='../logs/isic2018/unet_ep300/20200402-135108/model_best.pth.tar'
    model=get_models(args)
    model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict'])

    log_dir = './models/' + args.model+'_prune_'+args.note
    logger = get_logger(log_dir)
    print('RUNDIR: {}'.format(log_dir))
    logger.info('{}-L1Prune'.format(args.model))
    # setting
    args.save_path = log_dir
    args.save_tbx_log = args.save_path + '/tbx_log'
    writer = SummaryWriter(args.save_tbx_log)

    if args.manualSeed is None:
        args.manualSeed = random.randint(1, 10000)
    np.random.seed(args.manualSeed)
    torch.manual_seed(args.manualSeed)

    args.use_cuda = args.gpus > 0 and torch.cuda.is_available()
    args.device = torch.device('cuda' if args.use_cuda else 'cpu')
    if args.use_cuda:
        torch.cuda.manual_seed(args.manualSeed)
        cudnn.benchmark = True

    setting = {k: v for k, v in args._get_kwargs()}
    logger.info(setting)

    train_loader=get_dataloder(args,split_flag="train")
    val_loader=get_dataloder(args,split_flag="valid")


    # init loss
    if args.loss == 'bce':
        criterion = nn.BCELoss()
    elif args.loss == 'bcelog':
        criterion = nn.BCEWithLogitsLoss()
    elif args.loss == "dice":
        criterion = DiceLoss()
    elif args.loss == "softdice":
        criterion = SoftDiceLoss()
    elif args.loss == 'bcedice':
        criterion = BCEDiceLoss()
    else:
        criterion = nn.CrossEntropyLoss()
    if args.use_cuda:
        logger.info("load model and criterion to gpu !")
    model = model.to(args.device)
    criterion = criterion.to(args.device)

    logger.info("Original trained model performance test: ")
    infer(args, model, criterion, val_loader,logger)

    # Pruning
    # Pruning Configuration, in paper 'PRUNING FILTERS FOR EFFICIENT CONVNETS',
    configure_list = [{
        'sparsity': 0.5,
        'op_types': ['Conv2d'],
        'op_names': ['Conv1.conv.0','Conv1.conv.3','Conv2.conv.0','Conv2.conv.3','Conv3.conv.0','Conv3.conv.3',
                     'Conv4.conv.0','Conv4.conv.3','Conv5.conv.0','Conv5.conv.3',
                     'Up5.up.1','Up_conv5.conv.0','Up_conv5.conv.3',
                     'Up4.up.1','Up_conv4.conv.0','Up_conv4.conv.3',
                     'Up3.up.1','Up_conv3.conv.0','Up_conv3.conv.3',
                     'Up2.up.1','Up_conv2.conv.0','Up_conv2.conv.3',
                     ]}
    ]
    # Prune model and test accuracy without fine tuning.
    logger.info('=' * 10 + 'Test on the pruned model before fine tune' + '=' * 10)
    pruner = L1FilterPruner(model, configure_list)

    # change the forward func (mul pruning mask )
    model = pruner.compress()

    # test performance without finetuning
    logger.info("Pruning trained model performance test: ")
    infer(args, model, criterion, val_loader,logger)

    # Fine tune the pruned model for 40 epochs and test accuracy
    logger.info('=' * 10 + 'Fine tuning' + '=' * 10)
    #torch.optim.SGD(parametetrs,lr=args.lr,weight_decay=args.weight_decay,momentum=args.momentum)
    optimizer=torch.optim.SGD(model.parameters(),lr=args.lr,weight_decay=args.weight_decay,momentum=args.momentum)
    # init schedulers  Steplr
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,args.epoch)

    max_value = 0
    for epoch in range(0, args.epoch):
        # lr=adjust_learning_rate(args,optimizer,epoch)
        scheduler.step()
        logger.info('Epoch: %d lr %e', epoch, scheduler.get_lr()[0])
        # update mask
        pruner.update_epoch(epoch)
        # train
        train(args, model, criterion, train_loader,optimizer, epoch, logger)
        # val
        vmr, vms, vmp, vmf, vmjc, vmd, vmacc,vloss = infer(args, model, criterion, val_loader,logger)

        writer.add_scalar('Val/Loss', vloss, epoch)
        writer.add_scalar('Val/mAcc', vmacc, epoch)
        writer.add_scalar('Val/Recall', vmr, epoch)
        writer.add_scalar('Val/Specifi', vms, epoch)
        writer.add_scalar('Val/Precision', vmp, epoch)
        writer.add_scalar('Val/F1', vmf, epoch)
        writer.add_scalar('Val/Jc', vmjc, epoch)
        writer.add_scalar('Val/Dice', vmd, epoch)

        is_best = True if (vmjc >= max_value) else False
        max_value = max(max_value, vmjc)
        if is_best:
            pruner.export_model(model_path=os.path.join(args.save_path,"best_prune_unet.pth"), mask_path=os.path.join(args.save_path,'mask_prune_indexs.pth'))
        state = {
            'epoch': epoch,
            'optimizer': optimizer.state_dict(),
            'state_dict': model.state_dict(),
            'scheduler': model.state_dict(),
        }
        logger.info("epoch:{} best:{} max_value:{}".format(epoch, is_best, max_value))
        torch.save(state, os.path.join(args.save_path, "checkpoint.pth.tar"))
    writer.close()

    # test the best_prune_unet.pth
    args.model='unet'
    model_weight_path=os.path.join(args.save_path,"best_prune_unet.pth")
    model=get_models(args)
    model.load_state_dict(torch.load(model_weight_path, map_location='cpu'))
    model = model.to(args.device)
    logger.info("Final saved pruned  model performance test: ")
    infer(args, model, criterion, val_loader,logger)
示例#11
0
def main(args):

    args.model_list = [
        'double_deep', 'double', 'nodouble', 'nodouble_deep', 'slim_dd',
        'slim_double', 'slim_nodouble', 'slim_nodouble_deep'
    ]
    #args.model_list=["slim_nodouble_deep_init32"]
    for model_name in args.model_list:
        print(model_name)
        if model_name == "double_deep":
            args.deepsupervision = True
            args.double_down_channel = True
            args.genotype_name = 'stage1_layer9_110epoch_double_deep_final'
            genotype = eval('genotypes.%s' % args.genotype_name)
            model = BuildNasUnetPrune(
                genotype=genotype,
                input_c=args.in_channels,
                c=args.init_channels,
                num_classes=args.nclass,
                meta_node_num=args.middle_nodes,
                layers=args.layers,
                dp=args.dropout_prob,
                use_sharing=args.use_sharing,
                double_down_channel=args.double_down_channel,
                aux=args.aux)
            args.model_path = './logs/isic2018/prune_20200313-063406_32_32_ep300_double_deep/model_best.pth.tar'
            model.load_state_dict(
                torch.load(args.model_path, map_location='cpu')['state_dict'])

        elif model_name == 'double':
            args.deepsupervision = False
            args.double_down_channel = True
            args.genotype_name = 'stage1_layer9_110epoch_double_final'
            genotype = eval('genotypes.%s' % args.genotype_name)
            model = BuildNasUnetPrune(
                genotype=genotype,
                input_c=args.in_channels,
                c=args.init_channels,
                num_classes=args.nclass,
                meta_node_num=args.middle_nodes,
                layers=args.layers,
                dp=args.dropout_prob,
                use_sharing=args.use_sharing,
                double_down_channel=args.double_down_channel,
                aux=args.aux)
            args.model_path = './logs/isic2018/prune_20200313-063428_32_32_ep300_double/model_best.pth.tar'
            model.load_state_dict(
                torch.load(args.model_path, map_location='cpu')['state_dict'])

        elif model_name == 'nodouble':
            args.deepsupervision = False
            args.double_down_channel = False
            args.genotype_name = 'stage1_layer9_110epoch_final'
            genotype = eval('genotypes.%s' % args.genotype_name)
            model = BuildNasUnetPrune(
                genotype=genotype,
                input_c=args.in_channels,
                c=args.init_channels,
                num_classes=args.nclass,
                meta_node_num=args.middle_nodes,
                layers=args.layers,
                dp=args.dropout_prob,
                use_sharing=args.use_sharing,
                double_down_channel=args.double_down_channel,
                aux=args.aux)
            args.model_path = './logs/isic2018/prune_20200316-141125_nodouble_32_ep300/model_best.pth.tar'
            model.load_state_dict(
                torch.load(args.model_path, map_location='cpu')['state_dict'])

        elif model_name == 'nodouble_deep':
            args.deepsupervision = True
            args.double_down_channel = False
            args.genotype_name = 'stage1_layer9_110epoch_deep_final'
            genotype = eval('genotypes.%s' % args.genotype_name)
            model = BuildNasUnetPrune(
                genotype=genotype,
                input_c=args.in_channels,
                c=args.init_channels,
                num_classes=args.nclass,
                meta_node_num=args.middle_nodes,
                layers=args.layers,
                dp=args.dropout_prob,
                use_sharing=args.use_sharing,
                double_down_channel=args.double_down_channel,
                aux=args.aux)
            args.model_path = './logs/isic2018/prune_20200316-141242_nodouble_32_ep300_deep/model_best.pth.tar'
            model.load_state_dict(
                torch.load(args.model_path, map_location='cpu')['state_dict'])

        if model_name == "slim_dd":
            args.deepsupervision = True
            args.double_down_channel = True
            args.genotype_name = 'stage1_layer9_110epoch_double_deep_final'
            genotype = eval('genotypes.%s' % args.genotype_name)
            model = net_dd(genotype=genotype,
                           input_c=args.in_channels,
                           c=args.init_channels,
                           num_classes=args.nclass,
                           meta_node_num=args.middle_nodes,
                           layers=args.layers,
                           dp=args.dropout_prob,
                           use_sharing=args.use_sharing,
                           double_down_channel=args.double_down_channel,
                           aux=args.aux)
            args.model_path = './logs/isic2018/dd_20200319-170442_ep300/model_best.pth.tar'
            model.load_state_dict(
                torch.load(args.model_path, map_location='cpu')['state_dict'])

        elif model_name == 'slim_double':
            args.deepsupervision = False
            args.double_down_channel = True
            args.genotype_name = 'stage1_layer9_110epoch_double_final'
            genotype = eval('genotypes.%s' % args.genotype_name)
            model = net_double(genotype=genotype,
                               input_c=args.in_channels,
                               c=args.init_channels,
                               num_classes=args.nclass,
                               meta_node_num=args.middle_nodes,
                               layers=args.layers,
                               dp=args.dropout_prob,
                               use_sharing=args.use_sharing,
                               double_down_channel=args.double_down_channel,
                               aux=args.aux)
            args.model_path = './logs/isic2018/double_20200319-170621_ep300/model_best.pth.tar'
            model.load_state_dict(
                torch.load(args.model_path, map_location='cpu')['state_dict'])

        elif model_name == 'slim_nodouble':
            args.deepsupervision = False
            args.double_down_channel = False
            args.genotype_name = 'stage1_layer9_110epoch_final'
            genotype = eval('genotypes.%s' % args.genotype_name)
            model = net_nodouble(genotype=genotype,
                                 input_c=args.in_channels,
                                 c=args.init_channels,
                                 num_classes=args.nclass,
                                 meta_node_num=args.middle_nodes,
                                 layers=args.layers,
                                 dp=args.dropout_prob,
                                 use_sharing=args.use_sharing,
                                 double_down_channel=args.double_down_channel,
                                 aux=args.aux)
            args.model_path = './logs/isic2018/nodouble_20200319-210910_ep300/model_best.pth.tar'
            model.load_state_dict(
                torch.load(args.model_path, map_location='cpu')['state_dict'])

        elif model_name == 'slim_nodouble_deep':
            args.deepsupervision = True
            args.double_down_channel = False
            args.genotype_name = 'stage1_layer9_110epoch_deep_final'
            genotype = eval('genotypes.%s' % args.genotype_name)
            model = net_nodouble_deep(
                genotype=genotype,
                input_c=args.in_channels,
                c=args.init_channels,
                num_classes=args.nclass,
                meta_node_num=args.middle_nodes,
                layers=args.layers,
                dp=args.dropout_prob,
                use_sharing=args.use_sharing,
                double_down_channel=args.double_down_channel,
                aux=args.aux)
            args.model_path = './logs/isic2018/nodouble_deep_20200319-210600_ep300/model_best.pth.tar'
            model.load_state_dict(
                torch.load(args.model_path, map_location='cpu')['state_dict'])

        elif model_name == 'slim_nodouble_deep_init32':
            args.deepsupervision = True
            args.double_down_channel = False
            args.genotype_name = 'stage1_layer9_110epoch_deep_final'
            genotype = eval('genotypes.%s' % args.genotype_name)
            model = net_nodouble_deep(
                genotype=genotype,
                input_c=args.in_channels,
                c=32,
                num_classes=args.nclass,
                meta_node_num=args.middle_nodes,
                layers=args.layers,
                dp=args.dropout_prob,
                use_sharing=args.use_sharing,
                double_down_channel=args.double_down_channel,
                aux=args.aux)
            args.model_path = './logs/isic2018/nodouble_deep_ep300_init32/model_best.pth.tar'
            model.load_state_dict(
                torch.load(args.model_path, map_location='cpu')['state_dict'])

        #################### init logger ###################################
        log_dir = './eval' + '/{}'.format(
            args.dataset) + '/{}'.format(model_name)
        ##################### init model ########################################
        logger = get_logger(log_dir)
        print('RUNDIR: {}'.format(log_dir))
        logger.info('{}-Eval'.format(model_name))
        # setting
        args.save_path = log_dir
        args.save_images = os.path.join(args.save_path, "images")
        if not os.path.exists(args.save_images):
            os.mkdir(args.save_images)
        ##################### init device #################################
        if args.manualSeed is None:
            args.manualSeed = random.randint(1, 10000)
        np.random.seed(args.manualSeed)
        torch.manual_seed(args.manualSeed)
        args.use_cuda = args.gpus > 0 and torch.cuda.is_available()
        args.device = torch.device('cuda' if args.use_cuda else 'cpu')
        if args.use_cuda:
            torch.cuda.manual_seed(args.manualSeed)
            cudnn.benchmark = True
        ####################### init dataset ###########################################
        # sorted vaild datasets
        val_loader = get_dataloder(args, split_flag="valid")
        setting = {k: v for k, v in args._get_kwargs()}
        logger.info(setting)
        logger.info(genotype)
        logger.info('param size = %fMB', calc_parameters_count(model))

        # init loss
        if args.loss == 'bce':
            criterion = nn.BCELoss()
        elif args.loss == 'bcelog':
            criterion = nn.BCEWithLogitsLoss()
        elif args.loss == "dice":
            criterion = DiceLoss()
        elif args.loss == "softdice":
            criterion = SoftDiceLoss()
        elif args.loss == 'bcedice':
            criterion = BCEDiceLoss()
        else:
            criterion = nn.CrossEntropyLoss()
        if args.use_cuda:
            logger.info("load model and criterion to gpu !")
        model = model.to(args.device)
        criterion = criterion.to(args.device)
示例#12
0
def main(args):
    #################### init logger ###################################
    args.model_list=["unet","unet++",'attention_unet_v1','multires_unet','r2unet_t3']


    for model_name in args.model_list:
        if model_name=='unet':
            args.model='unet'
            model_weight_path='./logs/unet_ep1600/cvc/20200312-143050/model_best.pth.tar'
            model=get_models(args)
            model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict'])
        elif model_name=='unet++':
            args.model='unet++'
            args.deepsupervision=False
            model_weight_path='./logs/unet++_ep1600/cvc/20200312-143358/model_best.pth.tar'
            model=get_models(args)
            model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict'])

        elif model_name == 'attention_unet_v1':
            args.model = 'attention_unet_v1'
            model_weight_path = './logs/attention_unet_v1_ep1600/cvc/20200312-143413/model_best.pth.tar'
            model = get_models(args)
            model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict'])

        elif model_name == 'multires_unet':
            args.model = 'multires_unet'
            model_weight_path = './logs/multires_unet_ep1600_t2/20200322-194117/model_best.pth.tar'
            model = get_models(args)
            model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict'])

        # change bn relu order
        elif model_name == 'multires_unet_align':
            args.model = 'multires_unet'
            model_weight_path = './logs/multires_unet_ep1600_chbnrelu/20200327-184457/model_best.pth.tar'
            model = get_models(args)
            model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict'])


        elif model_name == 'r2unet_t3':
            args.model = 'r2unet'
            args.time_step=3
            model_weight_path = './logs/r2unet_ep1600_t2/20200324-032815/model_best.pth.tar'
            model = get_models(args)
            model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict'])


        elif model_name == 'unet_ep800dice':
            args.model = 'unet'
            model_weight_path = './logs/unet_ep800_bcedice/cvc/20200315-043021/model_best.pth.tar'
            model = get_models(args)
            model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict'])

        elif model_name=='unet++_nodeep_ep800dice':
            args.model='unet++'
            args.deepsupervision=False
            model_weight_path='./logs/unet++_ep800_bcedice/cvc/20200315-043214/model_best.pth.tar'
            model=get_models(args)
            model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict'])
        elif model_name == 'unet++_deep_ep800dice':
            args.model = 'unet++'
            args.deepsupervision = True
            model_weight_path = './logs/unet++_deep_ep800_bcedice/cvc/20200315-043134/model_best.pth.tar'
            model = get_models(args)
            model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict'])

        elif model_name == 'attention_unet_v1_ep800dice':
            args.model = 'attention_unet_v1'
            args.deepsupervision=False
            model_weight_path = './logs/attention_unet_v1_ep800_bcedice/cvc/20200315-043300/model_best.pth.tar'
            model = get_models(args)
            model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict'])

        elif model_name == 'multires_unet_ep800dice':
            args.model = 'multires_unet'
            args.deepsupervision=False
            model_weight_path = './logs/multires_unet_ep800_bcedice/cvc/20200312-173031/model_best.pth.tar'
            model = get_models(args)
            model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict'])

        else:
            raise  NotImplementedError()


        assert os.path.exists(args.save)
        args.model_save_path=os.path.join(args.save,model_name)
        logger = get_logger(args.model_save_path)
        args.save_images= os.path.join(args.model_save_path,"images")
        if not os.path.exists(args.save_images):
            os.mkdir(args.save_images)
        if args.manualSeed is None:
            args.manualSeed = random.randint(1, 10000)
        np.random.seed(args.manualSeed)
        torch.manual_seed(args.manualSeed)
        args.use_cuda = args.gpus > 0 and torch.cuda.is_available()
        args.device = torch.device('cuda' if args.use_cuda else 'cpu')
        if args.use_cuda:
            torch.cuda.manual_seed(args.manualSeed)
            cudnn.benchmark = True
        val_loader = get_dataloder(args, split_flag="valid")
        setting = {k: v for k, v in args._get_kwargs()}
        logger.info(setting)
        logger.info('param size = %fMB', calc_parameters_count(model))


        # init loss
        if args.loss == 'bce':
            criterion = nn.BCELoss()
        elif args.loss == 'bcelog':
            criterion = nn.BCEWithLogitsLoss()
        elif args.loss == "dice":
            criterion = DiceLoss()
        elif args.loss == "softdice":
            criterion = SoftDiceLoss()
        elif args.loss == 'bcedice':
            criterion = BCEDiceLoss()
        else:
            criterion = nn.CrossEntropyLoss()
        if args.use_cuda:
            logger.info("load model and criterion to gpu !")

        model = model.to(args.device)
        criterion = criterion.to(args.device)
        infer(args, model, criterion, val_loader,logger,args.save_images)
示例#13
0
def main(args):

    #################### init logger ###################################
    log_dir = './logs/' + '{}'.format(args.dataset) + '/{}_{}_{}'.format(
        args.model, time.strftime('%Y%m%d-%H%M%S'), args.note)
    logger = get_logger(log_dir)
    print('RUNDIR: {}'.format(log_dir))
    logger.info('{}-Train'.format(args.model))
    # setting
    args.save_path = log_dir
    args.save_tbx_log = args.save_path + '/tbx_log'
    writer = SummaryWriter(args.save_tbx_log)
    ##################### init device #################################
    if args.manualSeed is None:
        args.manualSeed = random.randint(1, 10000)
    np.random.seed(args.manualSeed)
    torch.manual_seed(args.manualSeed)
    args.use_cuda = args.gpus > 0 and torch.cuda.is_available()
    args.device = torch.device('cuda' if args.use_cuda else 'cpu')
    if args.use_cuda:
        torch.cuda.manual_seed(args.manualSeed)
        cudnn.benchmark = True
    ####################### init dataset ###########################################
    train_loader = get_dataloder(args, split_flag="train")
    val_loader = get_dataloder(args, split_flag="valid")
    ######################## init model ############################################
    # model
    # get the network parameters
    if args.model == "alpha_double_deep":
        args.deepsupervision = True
        args.double_down_channel = True
        args.genotype_name = 'stage1_layer9_110epoch_double_deep_final'
        args.alphas_model = './search_exp/Nas_Search_Unet/isic2018/deepsupervision/stage_1_model/checkpoint.pth.tar'
        model_alphas = torch.load(
            args.alphas_model,
            map_location=args.device)['alphas_dict']['alphas_network']
        model_alphas.requires_grad = False
        model_alphas = F.softmax(model_alphas, dim=-1)
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnet(genotype=genotype,
                             input_c=args.in_channels,
                             c=args.init_channels,
                             num_classes=args.nclass,
                             meta_node_num=args.middle_nodes,
                             layers=args.layers,
                             dp=args.dropout_prob,
                             use_sharing=args.use_sharing,
                             double_down_channel=args.double_down_channel,
                             aux=args.aux)

    elif args.model == "alpha_double":
        args.deepsupervision = False
        args.double_down_channel = True
        args.genotype_name = 'stage1_layer9_110epoch_double_final'
        args.alphas_model = './search_exp/Nas_Search_Unet/isic2018/nodeepsupervision/stage_1_model/checkpoint.pth.tar'
        model_alphas = torch.load(
            args.alphas_model,
            map_location=args.device)['alphas_dict']['alphas_network']
        model_alphas.requires_grad = False
        model_alphas = F.softmax(model_alphas, dim=-1)
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnet(genotype=genotype,
                             input_c=args.in_channels,
                             c=args.init_channels,
                             num_classes=args.nclass,
                             meta_node_num=args.middle_nodes,
                             layers=args.layers,
                             dp=args.dropout_prob,
                             use_sharing=args.use_sharing,
                             double_down_channel=args.double_down_channel,
                             aux=args.aux)

    elif args.model == "alpha_nodouble":
        args.deepsupervision = False
        args.double_down_channel = False
        args.genotype_name = 'stage1_layer9_110epoch_final'
        args.alphas_model = './search_exp/Nas_Search_Unet/isic2018/nodouble/stage_1_model/checkpoint.pth.tar'
        model_alphas = torch.load(
            args.alphas_model,
            map_location=args.device)['alphas_dict']['alphas_network']
        model_alphas.requires_grad = False
        model_alphas = F.softmax(model_alphas, dim=-1)
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnet(genotype=genotype,
                             input_c=args.in_channels,
                             c=args.init_channels,
                             num_classes=args.nclass,
                             meta_node_num=args.middle_nodes,
                             layers=args.layers,
                             dp=args.dropout_prob,
                             use_sharing=args.use_sharing,
                             double_down_channel=args.double_down_channel,
                             aux=args.aux)

    elif args.model == "alpha_nodouble_deep":
        args.deepsupervision = True
        args.double_down_channel = False
        args.genotype_name = 'stage1_layer9_110epoch_deep_final'
        args.alphas_model = './search_exp/Nas_Search_Unet/isic2018/nodouble_deep/stage_1_model/checkpoint.pth.tar'
        model_alphas = torch.load(
            args.alphas_model,
            map_location=args.device)['alphas_dict']['alphas_network']
        model_alphas.requires_grad = False
        model_alphas = F.softmax(model_alphas, dim=-1)
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnet(genotype=genotype,
                             input_c=args.in_channels,
                             c=args.init_channels,
                             num_classes=args.nclass,
                             meta_node_num=args.middle_nodes,
                             layers=args.layers,
                             dp=args.dropout_prob,
                             use_sharing=args.use_sharing,
                             double_down_channel=args.double_down_channel,
                             aux=args.aux)

    elif args.model == "double_deep":
        args.deepsupervision = True
        args.double_down_channel = True
        args.genotype_name = 'stage1_layer9_110epoch_double_deep_final'
        model_alphas = None
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(genotype=genotype,
                                  input_c=args.in_channels,
                                  c=args.init_channels,
                                  num_classes=args.nclass,
                                  meta_node_num=args.middle_nodes,
                                  layers=args.layers,
                                  dp=args.dropout_prob,
                                  use_sharing=args.use_sharing,
                                  double_down_channel=args.double_down_channel,
                                  aux=args.aux)

    elif args.model == "double":
        args.deepsupervision = False
        args.double_down_channel = True
        args.genotype_name = 'stage1_layer9_110epoch_double_final'
        model_alphas = None
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(genotype=genotype,
                                  input_c=args.in_channels,
                                  c=args.init_channels,
                                  num_classes=args.nclass,
                                  meta_node_num=args.middle_nodes,
                                  layers=args.layers,
                                  dp=args.dropout_prob,
                                  use_sharing=args.use_sharing,
                                  double_down_channel=args.double_down_channel,
                                  aux=args.aux)

    elif args.model == "nodouble":
        args.deepsupervision = False
        args.double_down_channel = False
        args.genotype_name = 'stage1_layer9_110epoch_final'
        model_alphas = None
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(genotype=genotype,
                                  input_c=args.in_channels,
                                  c=args.init_channels,
                                  num_classes=args.nclass,
                                  meta_node_num=args.middle_nodes,
                                  layers=args.layers,
                                  dp=args.dropout_prob,
                                  use_sharing=args.use_sharing,
                                  double_down_channel=args.double_down_channel,
                                  aux=args.aux)

    elif args.model == "nodouble_deep":
        args.deepsupervision = True
        args.double_down_channel = False
        args.genotype_name = 'stage1_layer9_110epoch_deep_final'
        model_alphas = None
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(genotype=genotype,
                                  input_c=args.in_channels,
                                  c=args.init_channels,
                                  num_classes=args.nclass,
                                  meta_node_num=args.middle_nodes,
                                  layers=args.layers,
                                  dp=args.dropout_prob,
                                  use_sharing=args.use_sharing,
                                  double_down_channel=args.double_down_channel,
                                  aux=args.aux)

    elif args.model == "alpha1_stage1_double_deep_ep80":
        args.deepsupervision = True
        args.double_down_channel = True
        args.genotype_name = 'alpha1_stage1_double_deep_ep80'
        model_alphas = None
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(genotype=genotype,
                                  input_c=args.in_channels,
                                  c=args.init_channels,
                                  num_classes=args.nclass,
                                  meta_node_num=args.middle_nodes,
                                  layers=args.layers,
                                  dp=args.dropout_prob,
                                  use_sharing=args.use_sharing,
                                  double_down_channel=args.double_down_channel,
                                  aux=args.aux)

    elif args.model == "alpha0_stage1_double_deep_ep80":
        args.deepsupervision = True
        args.double_down_channel = True
        args.genotype_name = 'alpha0_stage1_double_deep_ep80'
        model_alphas = None
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(genotype=genotype,
                                  input_c=args.in_channels,
                                  c=args.init_channels,
                                  num_classes=args.nclass,
                                  meta_node_num=args.middle_nodes,
                                  layers=args.layers,
                                  dp=args.dropout_prob,
                                  use_sharing=args.use_sharing,
                                  double_down_channel=args.double_down_channel,
                                  aux=args.aux)

    elif args.model == "alpha0_5_stage1_double_deep_ep80":
        args.deepsupervision = True
        args.double_down_channel = True
        args.genotype_name = 'alpha0_5_stage1_double_deep_ep80'
        model_alphas = None
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(genotype=genotype,
                                  input_c=args.in_channels,
                                  c=args.init_channels,
                                  num_classes=args.nclass,
                                  meta_node_num=args.middle_nodes,
                                  layers=args.layers,
                                  dp=args.dropout_prob,
                                  use_sharing=args.use_sharing,
                                  double_down_channel=args.double_down_channel,
                                  aux=args.aux)

    elif args.model == "alpha0_5_stage1_double_nodeep_ep80":
        args.deepsupervision = False
        args.double_down_channel = True
        args.genotype_name = 'alpha0_5_stage1_double_nodeep_ep80'
        model_alphas = None
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(genotype=genotype,
                                  input_c=args.in_channels,
                                  c=args.init_channels,
                                  num_classes=args.nclass,
                                  meta_node_num=args.middle_nodes,
                                  layers=args.layers,
                                  dp=args.dropout_prob,
                                  use_sharing=args.use_sharing,
                                  double_down_channel=args.double_down_channel,
                                  aux=args.aux)

    elif args.model == "alpha0_5_stage1_nodouble_deep_ep80":
        args.deepsupervision = True
        args.double_down_channel = False
        args.genotype_name = 'alpha0_5_stage1_nodouble_deep_ep80'
        model_alphas = None
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(genotype=genotype,
                                  input_c=args.in_channels,
                                  c=args.init_channels,
                                  num_classes=args.nclass,
                                  meta_node_num=args.middle_nodes,
                                  layers=args.layers,
                                  dp=args.dropout_prob,
                                  use_sharing=args.use_sharing,
                                  double_down_channel=args.double_down_channel,
                                  aux=args.aux)

    elif args.model == "alpha0_5_stage1_nodouble_nodeep_ep80":
        args.deepsupervision = False
        args.double_down_channel = False
        args.genotype_name = 'alpha0_5_stage1_nodouble_nodeep_ep80'
        model_alphas = None
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(genotype=genotype,
                                  input_c=args.in_channels,
                                  c=args.init_channels,
                                  num_classes=args.nclass,
                                  meta_node_num=args.middle_nodes,
                                  layers=args.layers,
                                  dp=args.dropout_prob,
                                  use_sharing=args.use_sharing,
                                  double_down_channel=args.double_down_channel,
                                  aux=args.aux)

    # cvc trans
    elif args.model == "layer7_double_deep":
        args.deepsupervision = True
        args.double_down_channel = True
        args.genotype_name = 'layer7_double_deep'
        model_alphas = None
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(genotype=genotype,
                                  input_c=args.in_channels,
                                  c=args.init_channels,
                                  num_classes=args.nclass,
                                  meta_node_num=args.middle_nodes,
                                  layers=args.layers,
                                  dp=args.dropout_prob,
                                  use_sharing=args.use_sharing,
                                  double_down_channel=args.double_down_channel,
                                  aux=args.aux)

    # chaos trans
    elif args.model == "stage0_double_deep_ep80_newim":
        args.deepsupervision = True
        args.double_down_channel = True
        args.genotype_name = 'stage0_double_deep_ep80_newim'
        model_alphas = None
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(genotype=genotype,
                                  input_c=args.in_channels,
                                  c=args.init_channels,
                                  num_classes=args.nclass,
                                  meta_node_num=args.middle_nodes,
                                  layers=args.layers,
                                  dp=args.dropout_prob,
                                  use_sharing=args.use_sharing,
                                  double_down_channel=args.double_down_channel,
                                  aux=args.aux)

    if torch.cuda.device_count() > 1 and args.use_cuda:
        logger.info('use: %d gpus', torch.cuda.device_count())
        model = nn.DataParallel(model)

    setting = {k: v for k, v in args._get_kwargs()}
    logger.info(setting)
    logger.info(genotype)
    logger.info(model_alphas)
    flop, param = get_model_complexity_info(model, (3, 256, 256),
                                            as_strings=True,
                                            print_per_layer_stat=False)
    print("GFLOPs: {}".format(flop))
    print("Params: {}".format(param))
    # init loss
    if args.loss == 'bce':
        criterion = nn.BCELoss()
    elif args.loss == 'bcelog':
        criterion = nn.BCEWithLogitsLoss()
    elif args.loss == "dice":
        criterion = DiceLoss()
    elif args.loss == "softdice":
        criterion = SoftDiceLoss()
    elif args.loss == 'bcedice':
        criterion = BCEDiceLoss()
    else:
        criterion = nn.CrossEntropyLoss()
    if args.use_cuda:
        logger.info("load model and criterion to gpu !")
    model = model.to(args.device)
    criterion = criterion.to(args.device)
    # init optimizer
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.lr,
                                weight_decay=args.weight_decay,
                                momentum=args.momentum)
    # init schedulers  Steplr
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, args.epoch)
    #scheduler=torch.optim.lr_scheduler.StepLR(optimizer=optimizer,step_size=30,gamma=0.1,last_epoch=-1)

    ############################### check resume #########################
    start_epoch = 0
    if args.resume is not None:
        if os.path.isfile(args.resume):
            logger.info(
                "Loading model and optimizer from checkpoint '{}'".format(
                    args.resume))
            checkpoint = torch.load(args.resume, map_location=args.device)
            start_epoch = checkpoint['epoch']
            optimizer.load_state_dict(checkpoint['optimizer'])
            model.load_state_dict(checkpoint['state_dict'])
            scheduler.load_state_dict(checkpoint['scheduler'])
        else:
            raise FileNotFoundError("No checkpoint found at '{}'".format(
                args.resume))

    #################################### train and val ########################
    max_value = 0
    for epoch in range(start_epoch, args.epoch):
        # lr=adjust_learning_rate(args,optimizer,epoch)
        scheduler.step()
        # train
        if args.deepsupervision:
            mean_loss, value1, value2 = train(args, model_alphas, model,
                                              criterion, train_loader,
                                              optimizer)
            mr, ms, mp, mf, mjc, md, macc = value1
            logger.info(
                "Epoch:{} Train_Loss:{:.3f} Acc:{:.3f} Dice:{:.3f} Jc:{:.3f}".
                format(epoch, mean_loss, macc, md, mjc))
            writer.add_scalar('Train/dDice', mmd, epoch)
        else:
            mean_loss, value1 = train(args, model_alphas, model, criterion,
                                      train_loader, optimizer)
            mr, ms, mp, mf, mjc, md, macc = value1
            logger.info(
                "Epoch:{} Train_Loss:{:.3f} Acc:{:.3f} Dice:{:.3f} Jc:{:.3f}".
                format(epoch, mean_loss, macc, md, mjc))
        # write
        writer.add_scalar('Train/Loss', mean_loss, epoch)

        # val
        if args.deepsupervision:
            vmean_loss, valuev1, valuev2 = infer(args, model_alphas, model,
                                                 criterion, val_loader)
            vmr, vms, vmp, vmf, vmjc, vmd, vmacc = valuev1
            logger.info(
                "Epoch:{} Val_Loss:{:.3f} Acc:{:.3f} Dice:{:.3f} Jc:{:.3f}".
                format(epoch, vmean_loss, vmacc, vmd, vmjc))

        else:
            vmean_loss, valuev1 = infer(args, model_alphas, model, criterion,
                                        val_loader)
            vmr, vms, vmp, vmf, vmjc, vmd, vmacc = valuev1
            logger.info(
                "Epoch:{} Val_Loss:{:.3f} Acc:{:.3f} Dice:{:.3f} Jc:{:.3f}".
                format(epoch, vmean_loss, vmacc, vmd, vmjc))

        is_best = True if vmjc >= max_value else False
        max_value = max(max_value, vmjc)
        writer.add_scalar('Val/Loss', vmean_loss, epoch)

        state = {
            'epoch': epoch,
            'optimizer': optimizer.state_dict(),
            'state_dict': model.state_dict(),
            'scheduler': model.state_dict(),
        }
        logger.info("epoch:{} best:{} max_value:{}".format(
            epoch, is_best, max_value))
        if not is_best:
            torch.save(state, os.path.join(args.save_path,
                                           "checkpoint.pth.tar"))
        else:
            torch.save(state, os.path.join(args.save_path,
                                           "checkpoint.pth.tar"))
            torch.save(state, os.path.join(args.save_path,
                                           "model_best.pth.tar"))

    writer.close()
示例#14
0
def main(args):

    data_path = '/home/birgit/MA/Code/torchmeta/gitlab/data'
    with open(args.config, 'r') as f:
        config = json.load(f)

    if args.folder is not None:
        config['folder'] = args.folder
    if args.num_steps > 0:
        config['num_steps'] = args.num_steps
    if args.num_batches > 0:
        config['num_batches'] = args.num_batches

    device = torch.device(
        'cuda' if args.use_cuda and torch.cuda.is_available() else 'cpu')

    loss_function = DiceLoss()

    dataset = 'pascal5i'
    fold = config['fold']

    steps = config['num_adaption_steps']

    padding = 1

    if 'feature_scale' in config.keys():
        model = Unet(feature_scale=config['feature_scale'], padding=padding)
    else:
        model = Unet(feature_scale=4, padding=padding)

    # get datasets and load into meta learning format
    meta_train_dataset, meta_val_dataset, meta_test_dataset = get_datasets(
        dataset,
        data_path,
        config['num_ways'],
        config['num_shots'],
        config['num_shots_test'],
        fold=fold,
        download=False,
        augment=False)

    meta_val_dataloader = BatchMetaDataLoader(meta_val_dataset,
                                              batch_size=config['batch_size'],
                                              shuffle=True,
                                              num_workers=args.num_workers,
                                              pin_memory=True)

    print('num shots = ', config['num_shots'])
    print(f'Using device: {device}')

    with open(config['model_path'], 'rb') as f:
        model.load_state_dict(torch.load(f, map_location=device))

    metalearner = ModelAgnosticMetaLearning(model,
                                            first_order=config['first_order'],
                                            num_adaptation_steps=steps,
                                            step_size=config['step_size'],
                                            loss_function=loss_function,
                                            device=device)

    results = metalearner.evaluate(meta_val_dataloader,
                                   max_batches=config['num_batches'],
                                   verbose=args.verbose,
                                   desc='Test',
                                   is_test=True)

    if dataset == 'pascal5i':
        labels = [
            'aeroplane', 'bike', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat',
            'chair', 'cow', 'dining table', 'dog', 'horse', 'motorbike',
            'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'
        ]
        accuracies = [
            value for _, value in results['mean_acc_per_label'].items()
        ]
        ious = [value for _, value in results['mean_iou_per_label'].items()]

        val_ious = [x for x in ious if x > 0.0]
        val_accs = [x for x in accuracies if x > 0.0]

        y_pos = np.arange(len(labels))

        fig, (ax1, ax2) = plt.subplots(1, 2)

        ax1.barh(y_pos, accuracies, align='center', alpha=0.5)
        ax1.set_yticks(y_pos)
        ax1.set_yticklabels(labels)
        ax1.set_xlabel('acc')
        ax1.set_xlim(0, 1)
        ax1.set_title('Accuracies per label')

        ax2.barh(y_pos, ious, align='center', alpha=0.5)
        ax2.set_yticks(y_pos)
        ax2.set_yticklabels(labels)
        ax2.set_xlabel('iou')
        ax2.set_xlim(0, 1)
        ax2.set_title('IoU scores per label')
        plt.grid(True)

        plt.show()

    # Save results
    dirname = os.path.dirname(config['model_path'])
    with open(os.path.join(dirname, 'test_results.json'), 'w') as f:
        json.dump(results, f)
示例#15
0
def main(args):
    #################### init logger ###################################
    # args.model_list=["unet","unet++_deep","unet++_nodeep",'attention_unet_v1','multires_unet','r2unet_t3',
    #                  'unet_ep800dice','unet++_deep_ep800dice','unet++_nodeep_ep800dice','attention_unet_v1_ep800dice','multires_unet_ep800dice'
    #                  ]
    args.model_list = ['unet', 'unet++', "attention_unet", "multires_unet"]

    for model_name in args.model_list:
        if model_name == 'unet':
            args.model = 'unet'
            model_weight_path = './logs/chaos/unet_ep150_v2/20200403-134703/checkpoint.pth.tar'
            model = get_models(args)
            model.load_state_dict(
                torch.load(model_weight_path,
                           map_location='cpu')['state_dict'])
        elif model_name == 'unet++':
            args.model = 'unet++'
            args.deepsupervision = False
            model_weight_path = './logs/chaos/unet++_ep150_v2/20200403-135401/checkpoint.pth.tar'
            model = get_models(args)
            model.load_state_dict(
                torch.load(model_weight_path,
                           map_location='cpu')['state_dict'])

        # elif model_name == 'unet++_deep':
        #     args.model = 'unet++'
        #     args.deepsupervision = True
        #     model_weight_path = './logs/unet++_deep_ep1600/cvc/20200312-143345/model_best.pth.tar'
        #     model = get_models(args)
        #     model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict'])

        elif model_name == 'attention_unet':
            args.model = 'attention_unet_v1'
            args.deepsupervision = False
            model_weight_path = './logs/chaos/attention_unet_v1_ep150_v2/20200403-135445/checkpoint.pth.tar'
            model = get_models(args)
            model.load_state_dict(
                torch.load(model_weight_path,
                           map_location='cpu')['state_dict'])

        elif model_name == 'multires_unet':
            args.model = 'multires_unet'
            args.deepsupervision = False
            model_weight_path = './logs/chaos/multires_unet_ep150_v2/20200403-135549/checkpoint.pth.tar'
            model = get_models(args)
            model.load_state_dict(
                torch.load(model_weight_path,
                           map_location='cpu')['state_dict'])

        else:
            raise NotImplementedError()

        assert os.path.exists(args.save)
        args.model_save_path = os.path.join(args.save, model_name)
        logger = get_logger(args.model_save_path)
        args.save_images = os.path.join(args.model_save_path, "images")
        if not os.path.exists(args.save_images):
            os.mkdir(args.save_images)
        if args.manualSeed is None:
            args.manualSeed = random.randint(1, 10000)
        np.random.seed(args.manualSeed)
        torch.manual_seed(args.manualSeed)
        args.use_cuda = args.gpus > 0 and torch.cuda.is_available()
        args.device = torch.device('cuda' if args.use_cuda else 'cpu')
        if args.use_cuda:
            torch.cuda.manual_seed(args.manualSeed)
            cudnn.benchmark = True
        val_loader = get_dataloder(args, split_flag="valid")
        setting = {k: v for k, v in args._get_kwargs()}
        logger.info(setting)
        logger.info('param size = %fMB', calc_parameters_count(model))

        # init loss
        if args.loss == 'bce':
            criterion = nn.BCELoss()
        elif args.loss == 'bcelog':
            criterion = nn.BCEWithLogitsLoss()
        elif args.loss == "dice":
            criterion = DiceLoss()
        elif args.loss == "softdice":
            criterion = SoftDiceLoss()
        elif args.loss == 'bcedice':
            criterion = BCEDiceLoss()
        else:
            criterion = nn.CrossEntropyLoss()
        if args.use_cuda:
            logger.info("load model and criterion to gpu !")

        model = model.to(args.device)
        criterion = criterion.to(args.device)
        infer(args, model, criterion, val_loader, logger, args.save_images)
示例#16
0
def main(args):
    #################### init logger ###################################
    #args.model_list=["unet","unet++_deep",'attention_unet_v1','multires_unet', 'r2unet_t3']

    args.model_list = [
        "unet", "unet++_deep", 'unet++_nodeep', "attention_unet_v1",
        "multires_unet", "r2unet"
    ]

    for model_name in args.model_list:
        # if model_name=='unet':
        #     args.model='unet'
        #     model_weight_path='./logs/isic/logs_coslr/unet/isic2018/20200229-035150/checkpoint.pth.tar'
        #     model=get_models(args)
        #     model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict'])
        # elif model_name=='unet++_deep':
        #     args.model='unet++'
        #     args.deepsupervision=True
        #     model_weight_path='./logs/isic/logs_coslr/unet++/isic2018/20200229-035514/checkpoint.pth.tar'
        #     model=get_models(args)
        #     model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict'])
        # elif model_name == 'unet++_nodeep':
        #     args.model = 'unet++'
        #     args.deepsupervision = False
        #     model_weight_path = '/checkpoint.pth.tar'
        #     model = get_models(args)
        #     model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict'])
        # elif model_name == 'attention_unet_v1':
        #     args.model = 'attention_unet_v1'
        #     model_weight_path = './logs/isic/logs_coslr/attention_unet_v1/isic2018/20200302-190718/checkpoint.pth.tar'
        #     args.deepsupervision=False
        #     model = get_models(args)
        #     model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict'])
        #
        # elif model_name == 'multires_unet':
        #     args.model = 'multires_unet'
        #     model_weight_path = './logs/isic/logs_coslr/multires_unet/isic2018/20200229-035734/checkpoint.pth.tar'
        #     model = get_models(args)
        #     model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict'])
        #
        # elif model_name == 'r2unet_t3':
        #     args.model = 'r2unet'
        #     args.time_step=3
        #     model_weight_path = './logs/isic/logs_coslr/r2unet/isic2018/20200302-190808/checkpoint.pth.tar'
        #     model = get_models(args)
        #     model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict'])

        # ep300   baseline
        if model_name == 'unet':
            args.model = 'unet'
            model_weight_path = './logs/isic2018/unet_ep300/20200402-135108/model_best.pth.tar'
            model = get_models(args)
            model.load_state_dict(
                torch.load(model_weight_path,
                           map_location='cpu')['state_dict'])
        elif model_name == 'unet++_deep':
            args.model = 'unet++'
            args.deepsupervision = True
            model_weight_path = './logs/isic2018/unet++_ep300_deep/20200402-135243/model_best.pth.tar'
            model = get_models(args)
            model.load_state_dict(
                torch.load(model_weight_path,
                           map_location='cpu')['state_dict'])
        elif model_name == 'unet++_nodeep':
            args.model = 'unet++'
            args.deepsupervision = False
            model_weight_path = './logs/isic2018/unet++_ep300/20200402-135317/model_best.pth.tar'
            model = get_models(args)
            model.load_state_dict(
                torch.load(model_weight_path,
                           map_location='cpu')['state_dict'])

        elif model_name == 'attention_unet_v1':
            args.model = 'attention_unet_v1'
            args.deepsupervision = False
            model_weight_path = './logs/isic2018/attention_unet_v1_ep300/20200413-160808//model_best.pth.tar'
            model = get_models(args)
            model.load_state_dict(
                torch.load(model_weight_path,
                           map_location='cpu')['state_dict'])

        elif model_name == 'multires_unet':
            args.model = 'multires_unet'
            args.deepsupervision = False
            model_weight_path = './logs/isic2018/attention_unet_v1_ep300/20200413-160808//model_best.pth.tar'
            model = get_models(args)
            model.load_state_dict(
                torch.load(model_weight_path,
                           map_location='cpu')['state_dict'])
        elif model_name == 'r2unet':
            args.model = 'r2unet'
            args.deepsupervision = False
            model_weight_path = './logs/isic2018/attention_unet_v1_ep300/20200413-160808//model_best.pth.tar'
            model = get_models(args)
            model.load_state_dict(
                torch.load(model_weight_path,
                           map_location='cpu')['state_dict'])

        # elif model_name == 'attention_unet_v1':
        #     args.model = 'attention_unet_v1'
        #     model_weight_path = './logs/isic/logs_coslr/attention_unet_v1/isic2018/20200302-190718/checkpoint.pth.tar'
        #     args.deepsupervision=False
        #     model = get_models(args)
        #     model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict'])
        #
        # elif model_name == 'multires_unet':
        #     args.model = 'multires_unet'
        #     model_weight_path = './logs/isic/logs_coslr/multires_unet/isic2018/20200229-035734/checkpoint.pth.tar'
        #     model = get_models(args)
        #     model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict'])
        #
        # elif model_name == 'r2unet_t3':
        #     args.model = 'r2unet'
        #     args.time_step=3
        #     model_weight_path = './logs/isic/logs_coslr/r2unet/isic2018/20200302-190808/checkpoint.pth.tar'
        #     model = get_models(args)
        #     model.load_state_dict(torch.load(model_weight_path, map_location='cpu')['state_dict'])

        else:
            raise NotImplementedError()

        assert os.path.exists(args.save)
        args.model_save_path = os.path.join(args.save, model_name)
        logger = get_logger(args.model_save_path)
        args.save_images = os.path.join(args.model_save_path, "images")
        if not os.path.exists(args.save_images):
            os.mkdir(args.save_images)

        if args.manualSeed is None:
            args.manualSeed = random.randint(1, 10000)
        np.random.seed(args.manualSeed)
        torch.manual_seed(args.manualSeed)
        args.use_cuda = args.gpus > 0 and torch.cuda.is_available()
        args.device = torch.device('cuda' if args.use_cuda else 'cpu')
        if args.use_cuda:
            torch.cuda.manual_seed(args.manualSeed)
            cudnn.benchmark = True

        val_loader = get_dataloder(args, split_flag="valid")

        setting = {k: v for k, v in args._get_kwargs()}
        logger.info(setting)
        logger.info('param size = %fMB', calc_parameters_count(model))

        # init loss
        if args.loss == 'bce':
            criterion = nn.BCELoss()
        elif args.loss == 'bcelog':
            criterion = nn.BCEWithLogitsLoss()
        elif args.loss == "dice":
            criterion = DiceLoss()
        elif args.loss == "softdice":
            criterion = SoftDiceLoss()
        elif args.loss == 'bcedice':
            criterion = BCEDiceLoss()
        else:
            criterion = nn.CrossEntropyLoss()
        if args.use_cuda:
            logger.info("load model and criterion to gpu !")

        model = model.to(args.device)
        criterion = criterion.to(args.device)
        infer(args, model, criterion, val_loader, logger, args.save_images)
示例#17
0
def main(args):
    #################### init logger ###################################
    log_dir = './logs/' + '{}'.format(args.dataset) + '/{}_{}_{}'.format(args.model,args.note,time.strftime('%Y%m%d-%H%M%S'))


    logger = get_logger(log_dir)
    print('RUNDIR: {}'.format(log_dir))
    logger.info('{}-Train'.format(args.model))
    # setting
    args.save_path = log_dir
    args.save_tbx_log = args.save_path + '/tbx_log'
    writer = SummaryWriter(args.save_tbx_log)
    ##################### init device #################################
    if args.manualSeed is None:
        args.manualSeed = random.randint(1, 10000)
    np.random.seed(args.manualSeed)
    torch.manual_seed(args.manualSeed)
    args.use_cuda = args.gpus > 0 and torch.cuda.is_available()
    args.device = torch.device('cuda' if args.use_cuda else 'cpu')
    if args.use_cuda:
        torch.cuda.manual_seed(args.manualSeed)
        cudnn.benchmark = True
    ####################### init dataset ###########################################
    train_loader = get_dataloder(args, split_flag="train")
    val_loader = get_dataloder(args, split_flag="valid")

    ############init model ###########################
    if  args.model == "layer7_double_deep":
        args.deepsupervision = True
        args.double_down_channel = True
        args.genotype_name = 'layer7_double_deep'
        model_alphas = None
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(
            genotype=genotype,
            input_c=args.in_channels,
            c=args.init_channels,
            num_classes=args.nclass,
            meta_node_num=args.middle_nodes,
            layers=7,
            dp=args.dropout_prob,
            use_sharing=args.use_sharing,
            double_down_channel=args.double_down_channel,
            aux=args.aux
        )



    elif args.model == "stage1_double_deep":
        args.deepsupervision = True
        args.double_down_channel = True
        args.genotype_name = 'stage1_double_deep'
        model_alphas = None
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(
            genotype=genotype,
            input_c=args.in_channels,
            c=args.init_channels,
            num_classes=args.nclass,
            meta_node_num=args.middle_nodes,
            layers=args.layers,
            dp=args.dropout_prob,
            use_sharing=args.use_sharing,
            double_down_channel=args.double_down_channel,
            aux=args.aux
        )

    elif args.model == "stage1_nodouble_deep":
        args.deepsupervision = True
        args.double_down_channel = False
        args.genotype_name = 'stage1_deep'
        model_alphas = None
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(
            genotype=genotype,
            input_c=args.in_channels,
            c=args.init_channels,
            num_classes=args.nclass,
            meta_node_num=args.middle_nodes,
            layers=args.layers,
            dp=args.dropout_prob,
            use_sharing=args.use_sharing,
            double_down_channel=args.double_down_channel,
            aux=args.aux
        )

    elif args.model == "stage1_nodouble_deep_slim":
        args.deepsupervision = True
        args.double_down_channel = False
        args.genotype_name = 'stage1_deep'
        model_alphas = None
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPruneSlim(
            genotype=genotype,
            input_c=args.in_channels,
            c=args.init_channels,
            num_classes=args.nclass,
            meta_node_num=args.middle_nodes,
            layers=args.layers,
            dp=args.dropout_prob,
            use_sharing=args.use_sharing,
            double_down_channel=args.double_down_channel,
            aux=args.aux
        )


    elif args.model == "alpha1_stage1_double_deep_ep80":
        args.deepsupervision = True
        args.double_down_channel = True
        args.genotype_name = 'alpha1_stage1_double_deep_ep80'
        model_alphas = None
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(
            genotype=genotype,
            input_c=args.in_channels,
            c=args.init_channels,
            num_classes=args.nclass,
            meta_node_num=args.middle_nodes,
            layers=args.layers,
            dp=args.dropout_prob,
            use_sharing=args.use_sharing,
            double_down_channel=args.double_down_channel,
            aux=args.aux
        )

    elif args.model == "alpha0_stage1_double_deep_ep80":
        args.deepsupervision = True
        args.double_down_channel = True
        args.genotype_name = 'alpha0_stage1_double_deep_ep80'
        model_alphas = None
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(
            genotype=genotype,
            input_c=args.in_channels,
            c=args.init_channels,
            num_classes=args.nclass,
            meta_node_num=args.middle_nodes,
            layers=args.layers,
            dp=args.dropout_prob,
            use_sharing=args.use_sharing,
            double_down_channel=args.double_down_channel,
            aux=args.aux
        )

    #isic trans
    elif args.model == "stage1_layer9_110epoch_double_deep_final":
        args.deepsupervision = True
        args.double_down_channel = True
        args.genotype_name = 'stage1_layer9_110epoch_double_deep_final'
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(
            genotype=genotype,
            input_c=args.in_channels,
            c=args.init_channels,
            num_classes=args.nclass,
            meta_node_num=args.middle_nodes,
            layers=args.layers,
            dp=args.dropout_prob,
            use_sharing=args.use_sharing,
            double_down_channel=args.double_down_channel,
            aux=args.aux
        )


    # just normaL cell keep
    elif args.model == "dd_normal":
        args.deepsupervision = True
        args.double_down_channel = True
        args.genotype_name = 'alpha0_5_stage1_double_deep_ep80'
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPruneNormal(
            genotype=genotype,
            input_c=args.in_channels,
            c=args.init_channels,
            num_classes=args.nclass,
            meta_node_num=args.middle_nodes,
            layers=args.layers,
            dp=args.dropout_prob,
            use_sharing=args.use_sharing,
            double_down_channel=args.double_down_channel,
            aux=args.aux
        )

    # normal+down
    elif args.model == "dd_normaldown":
        args.deepsupervision = True
        args.double_down_channel = True
        args.genotype_name = 'alpha0_5_stage1_double_deep_ep80'
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPruneNormalDown(
            genotype=genotype,
            input_c=args.in_channels,
            c=args.init_channels,
            num_classes=args.nclass,
            meta_node_num=args.middle_nodes,
            layers=args.layers,
            dp=args.dropout_prob,
            use_sharing=args.use_sharing,
            double_down_channel=args.double_down_channel,
            aux=args.aux
        )

    # normal+up 
    elif args.model == "dd_normalup":
        args.deepsupervision = True
        args.double_down_channel = True
        args.genotype_name = 'alpha0_5_stage1_double_deep_ep80'
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPruneNormalUp(
            genotype=genotype,
            input_c=args.in_channels,
            c=args.init_channels,
            num_classes=args.nclass,
            meta_node_num=args.middle_nodes,
            layers=args.layers,
            dp=args.dropout_prob,
            use_sharing=args.use_sharing,
            double_down_channel=args.double_down_channel,
            aux=args.aux
        )

    # normal+up+down
    elif args.model == "alpha0_5_stage1_double_deep_ep80":
        args.deepsupervision = True
        args.double_down_channel = True
        args.genotype_name = 'alpha0_5_stage1_double_deep_ep80'
        model_alphas = None
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(
            genotype=genotype,
            input_c=args.in_channels,
            c=args.init_channels,
            num_classes=args.nclass,
            meta_node_num=args.middle_nodes,
            layers=args.layers,
            dp=args.dropout_prob,
            use_sharing=args.use_sharing,
            double_down_channel=args.double_down_channel,
            aux=args.aux
        )

    # abliation study of channel doubling and deepsupervision
    elif args.model == "alpha0_5_stage1_double_nodeep_ep80":
        args.deepsupervision = False
        args.double_down_channel = True
        args.genotype_name = 'alpha0_5_stage1_double_nodeep_ep80'
        model_alphas = None
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(
            genotype=genotype,
            input_c=args.in_channels,
            c=args.init_channels,
            num_classes=args.nclass,
            meta_node_num=args.middle_nodes,
            layers=args.layers,
            dp=args.dropout_prob,
            use_sharing=args.use_sharing,
            double_down_channel=args.double_down_channel,
            aux=args.aux
        )

    elif args.model == "alpha0_5_stage1_nodouble_deep_ep80":
        args.deepsupervision = True
        args.double_down_channel = False
        args.genotype_name = 'alpha0_5_stage1_nodouble_deep_ep80'
        model_alphas = None
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(
            genotype=genotype,
            input_c=args.in_channels,
            c=args.init_channels,
            num_classes=args.nclass,
            meta_node_num=args.middle_nodes,
            layers=args.layers,
            dp=args.dropout_prob,
            use_sharing=args.use_sharing,
            double_down_channel=args.double_down_channel,
            aux=args.aux
        )

    elif args.model == "alpha0_5_stage1_nodouble_nodeep_ep80":
        args.deepsupervision = False
        args.double_down_channel = False
        args.genotype_name = 'alpha0_5_stage1_nodouble_nodeep_ep80'
        model_alphas = None
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(
            genotype=genotype,
            input_c=args.in_channels,
            c=args.init_channels,
            num_classes=args.nclass,
            meta_node_num=args.middle_nodes,
            layers=args.layers,
            dp=args.dropout_prob,
            use_sharing=args.use_sharing,
            double_down_channel=args.double_down_channel,
            aux=args.aux
        )

    if torch.cuda.device_count() > 1 and args.use_cuda:
        logger.info('use: %d gpus', torch.cuda.device_count())
        model = nn.DataParallel(model)

    setting = {k: v for k, v in args._get_kwargs()}
    logger.info(setting)
    logger.info(genotype)
    logger.info('param size = %fMB', calc_parameters_count(model))
    # init loss
    if args.loss == 'bce':
        criterion = nn.BCELoss()
    elif args.loss == 'bcelog':
        criterion = nn.BCEWithLogitsLoss()
    elif args.loss == "dice":
        criterion = DiceLoss()
    elif args.loss == "softdice":
        criterion = SoftDiceLoss()
    elif args.loss == 'bcedice':
        criterion = BCEDiceLoss()
    else:
        criterion = nn.CrossEntropyLoss()
    if args.use_cuda:
        logger.info("load model and criterion to gpu !")
    model = model.to(args.device)
    criterion = criterion.to(args.device)
    # init optimizer
    optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.weight_decay, momentum=args.momentum)
    # init schedulers  Steplr
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args.epoch)
    # scheduler=torch.optim.lr_scheduler.StepLR(optimizer=optimizer,step_size=30,gamma=0.1,last_epoch=-1)

    ############################### check resume #########################
    start_epoch = 0
    if args.resume is not None:
        if os.path.isfile(args.resume):
            logger.info("Loading model and optimizer from checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location=args.device)
            start_epoch = checkpoint['epoch']
            optimizer.load_state_dict(checkpoint['optimizer'])
            model.load_state_dict(checkpoint['state_dict'])
            scheduler.load_state_dict(checkpoint['scheduler'])
        else:
            raise FileNotFoundError("No checkpoint found at '{}'".format(args.resume))

    #################################### train and val ########################
    max_value = 0
    for epoch in range(start_epoch, args.epoch):
        # lr=adjust_learning_rate(args,optimizer,epoch)
        scheduler.step()
        logger.info('Epoch: %d lr %e', epoch, scheduler.get_lr()[0])
        # train
        if args.deepsupervision:
            mean_loss, value1, value2 = train(args, model, criterion, train_loader, optimizer)
            mr, ms, mp, mf, mjc, md, macc = value1
            mmr, mms, mmp, mmf, mmjc, mmd, mmacc = value2
            logger.info(
                "Epoch:{} Train_Loss:{:.3f} Acc:{:.3f} Dice:{:.3f} Jc:{:.3f}".format(epoch, mean_loss, macc, md, mjc))
            logger.info("                        dmAcc:{:.3f} dmDice:{:.3f} dmJc:{:.3f}".format(mmacc, mmd, mmjc))
            writer.add_scalar('Train/dmAcc', mmacc, epoch)
            writer.add_scalar('Train/dRecall', mmr, epoch)
            writer.add_scalar('Train/dSpecifi', mms, epoch)
            writer.add_scalar('Train/dPrecision', mmp, epoch)
            writer.add_scalar('Train/dF1', mmf, epoch)
            writer.add_scalar('Train/dJc', mmjc, epoch)
            writer.add_scalar('Train/dDice', mmd, epoch)
        else:
            mean_loss, value1 = train(args, model, criterion, train_loader,
                                      optimizer)
            mr, ms, mp, mf, mjc, md, macc = value1
            logger.info(
                "Epoch:{} Train_Loss:{:.3f} Acc:{:.3f} Dice:{:.3f} Jc:{:.3f}".format(epoch, mean_loss, macc, md, mjc))
        # write
        writer.add_scalar('Train/Loss', mean_loss, epoch)
        writer.add_scalar('Train/mAcc', macc, epoch)
        writer.add_scalar('Train/Recall', mr, epoch)
        writer.add_scalar('Train/Specifi', ms, epoch)
        writer.add_scalar('Train/Precision', mp, epoch)
        writer.add_scalar('Train/F1', mf, epoch)
        writer.add_scalar('Train/Jc', mjc, epoch)
        writer.add_scalar('Train/Dice', md, epoch)

        # val
        if args.deepsupervision:
            vmean_loss, valuev1, valuev2 = infer(args, model, criterion, val_loader)
            vmr, vms, vmp, vmf, vmjc, vmd, vmacc = valuev1
            mvmr, mvms, mvmp, mvmf, mvmjc, mvmd, mvmacc = valuev2
            logger.info(
                "Epoch:{} Val_Loss:{:.3f} Acc:{:.3f} Dice:{:.3f} Jc:{:.3f}".format(epoch, vmean_loss, vmacc, vmd, vmjc))
            logger.info("                        dmAcc:{:.3f} dmDice:{:.3f} dmJc:{:.3f}".format(mvmacc, mvmd, mvmjc))
            writer.add_scalar('Val/mAcc', mvmacc, epoch)
            writer.add_scalar('Val/Recall', mvmr, epoch)
            writer.add_scalar('Val/Specifi', mvms, epoch)
            writer.add_scalar('Val/Precision', mvmp, epoch)
            writer.add_scalar('Val/F1', mvmf, epoch)
            writer.add_scalar('Val/Jc', mvmjc, epoch)
            writer.add_scalar('Val/Dice', mvmd, epoch)
        else:
            vmean_loss, valuev1 = infer(args, model, criterion, val_loader)
            vmr, vms, vmp, vmf, vmjc, vmd, vmacc = valuev1
            logger.info(
                "Epoch:{} Val_Loss:{:.3f} Acc:{:.3f} Dice:{:.3f} Jc:{:.3f}".format(epoch, vmean_loss, vmacc, vmd, vmjc))

        is_best = True if (vmjc >=max_value) else False
        max_value = max(max_value, vmjc)
        writer.add_scalar('Val/Loss', vmean_loss, epoch)
        writer.add_scalar('Val/mAcc', vmacc, epoch)
        writer.add_scalar('Val/Recall', vmr, epoch)
        writer.add_scalar('Val/Specifi', vms, epoch)
        writer.add_scalar('Val/Precision', vmp, epoch)
        writer.add_scalar('Val/F1', vmf, epoch)
        writer.add_scalar('Val/Jc', vmjc, epoch)
        writer.add_scalar('Val/Dice', vmd, epoch)

        state={
                'epoch': epoch,
                'optimizer': optimizer.state_dict(),
                'state_dict': model.state_dict(),
                'scheduler': model.state_dict(),
            }
        logger.info("epoch:{} best:{} max_value:{}".format(epoch,is_best,max_value))
        if not is_best:
            torch.save(state,os.path.join(args.save_path,"checkpoint.pth.tar"))
        else:
            torch.save(state,os.path.join(args.save_path,"checkpoint.pth.tar"))
            torch.save(state,os.path.join(args.save_path,"model_best.pth.tar"))

    writer.close()
示例#18
0
def main(args):
    ############    init config ################
    #################### init logger ###################################
    log_dir = './search_exp/' + '/{}'.format(args.model) + \
              '/{}'.format(args.dataset) + '/{}_{}'.format(time.strftime('%Y%m%d-%H%M%S'),args.note)

    logger = get_logger(log_dir)
    print('RUNDIR: {}'.format(log_dir))
    logger.info('{}-Search'.format(args.model))
    args.save_path = log_dir
    args.save_tbx_log = args.save_path + '/tbx_log'
    writer = SummaryWriter(args.save_tbx_log)
    ##################### init device #################################
    if args.manualSeed is None:
        args.manualSeed = random.randint(1, 10000)
    np.random.seed(args.manualSeed)
    torch.manual_seed(args.manualSeed)
    args.use_cuda = args.gpus > 0 and torch.cuda.is_available()
    args.multi_gpu = args.gpus > 1 and torch.cuda.is_available()
    args.device = torch.device('cuda:0' if args.use_cuda else 'cpu')
    if args.use_cuda:
        torch.cuda.manual_seed(args.manualSeed)
        cudnn.enabled = True
        cudnn.benchmark = True
    setting = {k: v for k, v in args._get_kwargs()}
    logger.info(setting)

    ####################### init dataset ###########################################
    logger.info("Dataset for search is {}".format(args.dataset))
    train_dataset = datasets_dict[args.dataset](args,
                                                args.dataset_root,
                                                split='train')
    val_dataset = datasets_dict[args.dataset](args,
                                              args.dataset_root,
                                              split='valid')
    # train_dataset=datasets_dict[args.dataset](args,split='train')
    # val_dataset=datasets_dict[args.dataset](args,split='valid')
    num_train = len(train_dataset)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))
    # init loss
    if args.loss == 'bce':
        criterion = nn.BCELoss()
    elif args.loss == 'bcelog':
        criterion = nn.BCEWithLogitsLoss()
    elif args.loss == "dice":
        criterion = DiceLoss()
    elif args.loss == "softdice":
        criterion = SoftDiceLoss()
    elif args.loss == 'bcedice':
        criterion = BCEDiceLoss()
    else:
        criterion = nn.CrossEntropyLoss()
    if args.use_cuda:
        logger.info("load criterion to gpu !")
    criterion = criterion.to(args.device)
    ######################## init model ############################################
    switches_normal = []
    switches_down = []
    switches_up = []
    nums_mixop = sum([2 + i for i in range(args.meta_node_num)])
    for i in range(nums_mixop):
        switches_normal.append([True for j in range(len(CellPos))])
    for i in range(nums_mixop):
        switches_down.append([True for j in range(len(CellLinkDownPos))])
    for i in range(nums_mixop):
        switches_up.append([True for j in range(len(CellLinkUpPos))])
    # 6-->3-->1
    drop_op_down = [2, 3]
    # 4-->2-->1
    drop_op_up = [2, 1]
    # 7-->4-->1
    drop_op_normal = [3, 3]
    # stage0 pruning  stage 1 pruning, stage 2 (training)
    original_train_batch = args.train_batch
    original_val_batch = args.val_batch
    for sp in range(2):
        # build dataloader
        # model ,numclass=1,im_ch=3,init_channel=16,intermediate_nodes=4,layers=9
        if sp == 0:
            args.model = "UnetLayer7"
            args.layers = 7
            sp_train_batch = original_train_batch
            sp_val_batch = original_val_batch
            sp_epoch = args.epochs
            sp_lr = args.lr
        else:
            #args.model = "UnetLayer9"
            # 在算力平台上面UnetLayer9就是UnetLayer9_v2
            args.model = "UnetLayer9"
            args.layers = 9
            sp_train_batch = original_train_batch
            sp_val_batch = original_val_batch
            sp_lr = args.lr
            sp_epoch = args.epochs

        train_queue = data.DataLoader(
            train_dataset,
            batch_size=sp_train_batch,
            sampler=torch.utils.data.sampler.SubsetRandomSampler(
                indices[:split]),
            pin_memory=True,
            num_workers=args.num_workers)
        val_queue = data.DataLoader(
            train_dataset,
            batch_size=sp_train_batch,
            sampler=torch.utils.data.sampler.SubsetRandomSampler(
                indices[split:num_train]),
            pin_memory=True,
            num_workers=args.num_workers)
        test_dataloader = data.DataLoader(val_dataset,
                                          batch_size=sp_val_batch,
                                          pin_memory=True,
                                          num_workers=args.num_workers)
        logger.info(
            "stage:{} model:{} epoch:{} lr:{} train_batch:{} val_batch:{}".
            format(sp, args.model, sp_epoch, sp_lr, sp_train_batch,
                   sp_val_batch))

        model = get_models(args, switches_normal, switches_down, switches_up)
        save_model_path = os.path.join(args.save_path,
                                       "stage_{}_model".format(sp))
        if not os.path.exists(save_model_path):
            os.mkdir(save_model_path)
        if args.multi_gpu:
            logger.info('use: %d gpus', args.gpus)
            model = nn.DataParallel(model)
        model = model.to(args.device)
        logger.info('param size = %fMB', calc_parameters_count(model))
        # init optimizer for arch parameters and weight parameters
        # final stage, just train the network parameters
        optimizer_arch = torch.optim.Adam(model.arch_parameters(),
                                          lr=args.arch_lr,
                                          betas=(0.5, 0.999),
                                          weight_decay=args.arch_weight_decay)
        optimizer_weight = torch.optim.SGD(model.weight_parameters(),
                                           lr=sp_lr,
                                           weight_decay=args.weight_decay,
                                           momentum=args.momentum)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer_weight, sp_epoch, eta_min=args.lr_min)
        #################################### train and val ########################
        max_value = 0
        for epoch in range(0, sp_epoch):
            # lr=adjust_learning_rate(args,optimizer,epoch)
            scheduler.step()
            logger.info('Epoch: %d lr %e', epoch, scheduler.get_lr()[0])
            # train
            if epoch < args.arch_after:
                weight_loss_avg, arch_loss_avg, mr, ms, mp, mf, mjc, md, macc = train(
                    args,
                    train_queue,
                    val_queue,
                    model,
                    criterion,
                    optimizer_weight,
                    optimizer_arch,
                    train_arch=False)
            else:
                weight_loss_avg, arch_loss_avg, mr, ms, mp, mf, mjc, md, macc = train(
                    args,
                    train_queue,
                    val_queue,
                    model,
                    criterion,
                    optimizer_weight,
                    optimizer_arch,
                    train_arch=True)
            logger.info("Epoch:{} WeightLoss:{:.3f}  ArchLoss:{:.3f}".format(
                epoch, weight_loss_avg, arch_loss_avg))
            logger.info("         Acc:{:.3f}   Dice:{:.3f}  Jc:{:.3f}".format(
                macc, md, mjc))
            # write
            writer.add_scalar('Train/W_loss', weight_loss_avg, epoch)
            writer.add_scalar('Train/A_loss', arch_loss_avg, epoch)
            writer.add_scalar('Train/Dice', md, epoch)
            # infer
            if (epoch + 1) % args.infer_epoch == 0:
                genotype = model.genotype()
                logger.info('genotype = %s', genotype)
                val_loss, (vmr, vms, vmp, vmf, vmjc, vmd,
                           vmacc) = infer(args, model, val_queue, criterion)
                logger.info(
                    "ValLoss:{:.3f} ValAcc:{:.3f}  ValDice:{:.3f} ValJc:{:.3f}"
                    .format(val_loss, vmacc, vmd, vmjc))
                writer.add_scalar('Val/loss', val_loss, epoch)

                is_best = True if (vmjc >= max_value) else False
                max_value = max(max_value, vmjc)
                state = {
                    'epoch': epoch,
                    'optimizer_arch': optimizer_arch.state_dict(),
                    'optimizer_weight': optimizer_weight.state_dict(),
                    'scheduler': scheduler.state_dict(),
                    'state_dict': model.state_dict(),
                    'alphas_dict': model.alphas_dict(),
                }
                logger.info("epoch:{} best:{} max_value:{}".format(
                    epoch, is_best, max_value))
                if not is_best:
                    torch.save(
                        state,
                        os.path.join(save_model_path, "checkpoint.pth.tar"))
                else:
                    torch.save(
                        state,
                        os.path.join(save_model_path, "checkpoint.pth.tar"))
                    torch.save(
                        state,
                        os.path.join(save_model_path, "model_best.pth.tar"))

        # one stage end, we should change the operations num (divided 2)
        weight_down = F.softmax(model.arch_parameters()[0],
                                dim=-1).data.cpu().numpy()
        weight_up = F.softmax(model.arch_parameters()[1],
                              dim=-1).data.cpu().numpy()
        weight_normal = F.softmax(model.arch_parameters()[2],
                                  dim=-1).data.cpu().numpy()
        weight_network = F.softmax(model.arch_parameters()[3],
                                   dim=-1).data.cpu().numpy()
        logger.info("alphas_down: \n{}".format(weight_down))
        logger.info("alphas_up: \n{}".format(weight_up))
        logger.info("alphas_normal: \n{}".format(weight_normal))
        logger.info("alphas_network: \n{}".format(weight_network))

        genotype = model.genotype()
        logger.info('Stage:{} \n  Genotype: {}'.format(sp, genotype))
        logger.info(
            '------Stage {} end ! Then  Dropping Paths------'.format(sp))
        # 6                4              7
        # CellLinkDownPos CellLinkUpPos CellPos
        # # 6-->3-->1
        # drop_op_down = [3, 2]
        # # 4-->2-->1
        # drop_op_up = [2, 1]
        # # 7-->4-->1
        # drop_op_normal = [3, 3]
        # update switches in 0 stage end
        if sp == 0:
            switches_down = update_switches(weight_down.copy(),
                                            switches_down.copy(),
                                            CellLinkDownPos, drop_op_down[sp])
            switches_up = update_switches(weight_up.copy(), switches_up.copy(),
                                          CellLinkUpPos, drop_op_up[sp])
            switches_normal = update_switches(weight_normal.copy(),
                                              switches_normal.copy(), CellPos,
                                              drop_op_normal[sp])
            logger.info('switches_down = %s', switches_down)
            logger.info('switches_up = %s', switches_up)
            logger.info('switches_normal = %s', switches_normal)
            logging_switches(logger, switches_down, CellLinkDownPos)
            logging_switches(logger, switches_up, CellLinkUpPos)
            logging_switches(logger, switches_normal, CellPos)
        else:
            # sp==1 is the final stage, we don`t need the keep operations
            # because we has the model.genotype
            # show the final one op in 14 mixop
            switches_down = update_switches(weight_down.copy(),
                                            switches_down.copy(),
                                            CellLinkDownPos, drop_op_down[sp])
            switches_up = update_switches(weight_up.copy(), switches_up.copy(),
                                          CellLinkUpPos, drop_op_up[sp])
            switches_normal = update_switches_nozero(weight_normal.copy(),
                                                     switches_normal.copy(),
                                                     CellPos,
                                                     drop_op_normal[sp])
            logger.info('switches_down = %s', switches_down)
            logger.info('switches_up = %s', switches_up)
            logger.info('switches_normal = %s', switches_normal)
            logging_switches(logger, switches_down, CellLinkDownPos)
            logging_switches(logger, switches_up, CellLinkUpPos)
            logging_switches(logger, switches_normal, CellPos)
    writer.close()
示例#19
0
def train(cont=False):

    # for tensorboard tracking
    logger = get_logger()
    logger.info("(1) Initiating Training ... ")
    logger.info("Training on device: {}".format(device))
    writer = SummaryWriter()

    # init model
    aux_layers = None
    if net == "SETR-PUP":
        aux_layers, model = get_SETR_PUP()
    elif net == "SETR-MLA":
        aux_layers, model = get_SETR_MLA()
    elif net == "TransUNet-Base":
        model = get_TransUNet_base()
    elif net == "TransUNet-Large":
        model = get_TransUNet_large()
    elif net == "UNet":
        model = UNet(CLASS_NUM)

    # prepare dataset
    cluster_model = get_clustering_model(logger)
    train_dataset = CityscapeDataset(img_dir=data_dir,
                                     img_dim=IMG_DIM,
                                     mode="train",
                                     cluster_model=cluster_model)
    valid_dataset = CityscapeDataset(img_dir=data_dir,
                                     img_dim=IMG_DIM,
                                     mode="val",
                                     cluster_model=cluster_model)
    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=batch_size,
                              shuffle=False)

    logger.info("(2) Dataset Initiated. ")

    # optimizer
    epochs = epoch_num if epoch_num > 0 else iteration_num // len(
        train_loader) + 1
    optim = SGD(model.parameters(),
                lr=lrate,
                momentum=momentum,
                weight_decay=wdecay)
    # optim = Adam(model.parameters(), lr=lrate)
    scheduler = lr_scheduler.MultiStepLR(
        optim, milestones=[int(epochs * fine_tune_ratio)], gamma=0.1)

    cur_epoch = 0
    best_loss = float('inf')
    epochs_since_improvement = 0

    # for continue training
    if cont:
        model, optim, cur_epoch, best_loss = load_ckpt_continue_training(
            best_ckpt_src, model, optim, logger)
        logger.info("Current best loss: {0}".format(best_loss))
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            for i in range(cur_epoch):
                scheduler.step()
    else:
        model = nn.DataParallel(model)
        model = model.to(device)

    logger.info("(3) Model Initiated ... ")
    logger.info("Training model: {}".format(net) + ". Training Started.")

    # loss
    ce_loss = CrossEntropyLoss()
    if use_dice_loss:
        dice_loss = DiceLoss(CLASS_NUM)

    # loop over epochs
    iter_count = 0
    epoch_bar = tqdm.tqdm(total=epochs,
                          desc="Epoch",
                          position=cur_epoch,
                          leave=True)
    logger.info("Total epochs: {0}. Starting from epoch {1}.".format(
        epochs, cur_epoch + 1))

    for e in range(epochs - cur_epoch):
        epoch = e + cur_epoch

        # Training.
        model.train()
        trainLossMeter = LossMeter()
        train_batch_bar = tqdm.tqdm(total=len(train_loader),
                                    desc="TrainBatch",
                                    position=0,
                                    leave=True)

        for batch_num, (orig_img, mask_img) in enumerate(train_loader):
            orig_img, mask_img = orig_img.float().to(
                device), mask_img.float().to(device)

            if net == "TransUNet-Base" or net == "TransUNet-Large":
                pred = model(orig_img)
            elif net == "SETR-PUP" or net == "SETR-MLA":
                if aux_layers is not None:
                    pred, _ = model(orig_img)
                else:
                    pred = model(orig_img)
            elif net == "UNet":
                pred = model(orig_img)

            loss_ce = ce_loss(pred, mask_img[:].long())
            if use_dice_loss:
                loss_dice = dice_loss(pred, mask_img, softmax=True)
                loss = 0.5 * (loss_ce + loss_dice)
            else:
                loss = loss_ce

            # Backward Propagation, Update weight and metrics
            optim.zero_grad()
            loss.backward()
            optim.step()

            # update learning rate
            for param_group in optim.param_groups:
                orig_lr = param_group['lr']
                param_group['lr'] = orig_lr * (1.0 -
                                               iter_count / iteration_num)**0.9
            iter_count += 1

            # Update loss
            trainLossMeter.update(loss.item())

            # print status
            if (batch_num + 1) % print_freq == 0:
                status = 'Epoch: [{0}][{1}/{2}]\t' \
                    'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(epoch+1, batch_num+1, len(train_loader), loss=trainLossMeter)
                logger.info(status)

            # log loss to tensorboard
            if (batch_num + 1) % tensorboard_freq == 0:
                writer.add_scalar(
                    'Train_Loss_{0}'.format(tensorboard_freq),
                    trainLossMeter.avg,
                    epoch * (len(train_loader) / tensorboard_freq) +
                    (batch_num + 1) / tensorboard_freq)
            train_batch_bar.update(1)

        writer.add_scalar('Train_Loss_epoch', trainLossMeter.avg, epoch)

        # Validation.
        model.eval()
        validLossMeter = LossMeter()
        valid_batch_bar = tqdm.tqdm(total=len(valid_loader),
                                    desc="ValidBatch",
                                    position=0,
                                    leave=True)
        with torch.no_grad():
            for batch_num, (orig_img, mask_img) in enumerate(valid_loader):
                orig_img, mask_img = orig_img.float().to(
                    device), mask_img.float().to(device)

                if net == "TransUNet-Base" or net == "TransUNet-Large":
                    pred = model(orig_img)
                elif net == "SETR-PUP" or net == "SETR-MLA":
                    if aux_layers is not None:
                        pred, _ = model(orig_img)
                    else:
                        pred = model(orig_img)
                elif net == "UNet":
                    pred = model(orig_img)

                loss_ce = ce_loss(pred, mask_img[:].long())
                if use_dice_loss:
                    loss_dice = dice_loss(pred, mask_img, softmax=True)
                    loss = 0.5 * (loss_ce + loss_dice)
                else:
                    loss = loss_ce

                # Update loss
                validLossMeter.update(loss.item())

            # print status
            if (batch_num + 1) % print_freq == 0:
                status = 'Validation: [{0}][{1}/{2}]\t' \
                    'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(epoch+1, batch_num+1, len(valid_loader), loss=validLossMeter)
                logger.info(status)

            # log loss to tensorboard
            if (batch_num + 1) % tensorboard_freq == 0:
                writer.add_scalar(
                    'Valid_Loss_{0}'.format(tensorboard_freq),
                    validLossMeter.avg,
                    epoch * (len(valid_loader) / tensorboard_freq) +
                    (batch_num + 1) / tensorboard_freq)
            valid_batch_bar.update(1)

        valid_loss = validLossMeter.avg
        writer.add_scalar('Valid_Loss_epoch', valid_loss, epoch)
        logger.info("Validation Loss of epoch [{0}/{1}]: {2}\n".format(
            epoch + 1, epochs, valid_loss))

        # update optim scheduler
        scheduler.step()

        # save checkpoint
        is_best = valid_loss < best_loss
        best_loss_tmp = min(valid_loss, best_loss)
        if not is_best:
            epochs_since_improvement += 1
            logger.info("Epochs since last improvement: %d\n" %
                        (epochs_since_improvement, ))
            if epochs_since_improvement == early_stop_tolerance:
                break  # early stopping.
        else:
            epochs_since_improvement = 0
            state = {
                'epoch': epoch,
                'loss': best_loss_tmp,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optim.state_dict(),
            }
            torch.save(state, ckpt_src)
            logger.info("Checkpoint updated.")
            best_loss = best_loss_tmp
        epoch_bar.update(1)
    writer.close()
示例#20
0
def main(args):

    #################### init logger ###################################
    log_dir = './logs/' + '{}'.format(args.dataset) + '/{}_{}_{}'.format(
        args.model, args.note, time.strftime('%Y%m%d-%H%M%S'))

    logger = get_logger(log_dir)
    print('RUNDIR: {}'.format(log_dir))
    logger.info('{}-Train'.format(args.model))
    # setting
    setting = {k: v for k, v in args._get_kwargs()}
    logger.info(setting)
    args.save_path = log_dir
    args.save_tbx_log = args.save_path + '/tbx_log'
    writer = SummaryWriter(args.save_tbx_log)
    ##################### init device #################################
    if args.manualSeed is None:
        args.manualSeed = random.randint(1, 10000)
    np.random.seed(args.manualSeed)
    torch.manual_seed(args.manualSeed)
    args.use_cuda = args.gpus > 0 and torch.cuda.is_available()
    args.device = torch.device('cuda' if args.use_cuda else 'cpu')
    if args.use_cuda:
        torch.cuda.manual_seed(args.manualSeed)
        cudnn.benchmark = True
    ####################### init dataset ###########################################
    train_loader = get_dataloder(args, split_flag="train")
    val_loader = get_dataloder(args, split_flag="valid")
    ######################## init model ############################################
    # model
    ############init model ###########################
    if args.model == "nodouble_deep_init32_ep100":
        args.deepsupervision = True
        args.double_down_channel = False
        args.genotype_name = 'nodouble_deep_init32_ep100'
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(genotype=genotype,
                                  input_c=args.in_channels,
                                  c=32,
                                  num_classes=args.nclass,
                                  meta_node_num=args.middle_nodes,
                                  layers=9,
                                  dp=args.dropout_prob,
                                  use_sharing=args.use_sharing,
                                  double_down_channel=args.double_down_channel,
                                  aux=args.aux)

    elif args.model == "nodouble_deep_isic":
        args.deepsupervision = True
        args.double_down_channel = False
        args.genotype_name = 'stage1_layer9_110epoch_deep_final'
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(genotype=genotype,
                                  input_c=args.in_channels,
                                  c=args.init_channels,
                                  num_classes=args.nclass,
                                  meta_node_num=args.middle_nodes,
                                  layers=args.layers,
                                  dp=args.dropout_prob,
                                  use_sharing=args.use_sharing,
                                  double_down_channel=args.double_down_channel,
                                  aux=args.aux)

    elif args.model == "nodouble_deep_drop02_layer7end":
        args.deepsupervision = True
        args.double_down_channel = False
        args.genotype_name = 'nodouble_deep_drop02_layer7end'
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(genotype=genotype,
                                  input_c=args.in_channels,
                                  c=args.init_channels,
                                  num_classes=args.nclass,
                                  meta_node_num=args.middle_nodes,
                                  layers=args.layers,
                                  dp=args.dropout_prob,
                                  use_sharing=args.use_sharing,
                                  double_down_channel=args.double_down_channel,
                                  aux=args.aux)

    elif args.model == "stage1_nodouble_deep_ep36":
        args.deepsupervision = True
        args.double_down_channel = False
        args.genotype_name = 'stage1_nodouble_deep_ep36'
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(genotype=genotype,
                                  input_c=args.in_channels,
                                  c=args.init_channels,
                                  num_classes=args.nclass,
                                  meta_node_num=args.middle_nodes,
                                  layers=args.layers,
                                  dp=args.dropout_prob,
                                  use_sharing=args.use_sharing,
                                  double_down_channel=args.double_down_channel,
                                  aux=args.aux)

    elif args.model == "stage1_nodouble_deep_ep63":
        args.deepsupervision = True
        args.double_down_channel = False
        args.genotype_name = 'stage1_nodouble_deep_ep63'
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(genotype=genotype,
                                  input_c=args.in_channels,
                                  c=args.init_channels,
                                  num_classes=args.nclass,
                                  meta_node_num=args.middle_nodes,
                                  layers=args.layers,
                                  dp=args.dropout_prob,
                                  use_sharing=args.use_sharing,
                                  double_down_channel=args.double_down_channel,
                                  aux=args.aux)
    elif args.model == "stage1_nodouble_deep_ep83":
        args.deepsupervision = True
        args.double_down_channel = False
        args.genotype_name = 'stage1_nodouble_deep_ep83'
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(genotype=genotype,
                                  input_c=args.in_channels,
                                  c=args.init_channels,
                                  num_classes=args.nclass,
                                  meta_node_num=args.middle_nodes,
                                  layers=args.layers,
                                  dp=args.dropout_prob,
                                  use_sharing=args.use_sharing,
                                  double_down_channel=args.double_down_channel,
                                  aux=args.aux)

    elif args.model == "alpha1_stage1_double_deep_ep80":
        args.deepsupervision = True
        args.double_down_channel = True
        args.genotype_name = 'alpha1_stage1_double_deep_ep80'
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(genotype=genotype,
                                  input_c=args.in_channels,
                                  c=args.init_channels,
                                  num_classes=args.nclass,
                                  meta_node_num=args.middle_nodes,
                                  layers=args.layers,
                                  dp=args.dropout_prob,
                                  use_sharing=args.use_sharing,
                                  double_down_channel=args.double_down_channel,
                                  aux=args.aux)

    elif args.model == "alpha0_stage1_double_deep_ep80":
        args.deepsupervision = True
        args.double_down_channel = True
        args.genotype_name = 'alpha0_stage1_double_deep_ep80'
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(genotype=genotype,
                                  input_c=args.in_channels,
                                  c=args.init_channels,
                                  num_classes=args.nclass,
                                  meta_node_num=args.middle_nodes,
                                  layers=args.layers,
                                  dp=args.dropout_prob,
                                  use_sharing=args.use_sharing,
                                  double_down_channel=args.double_down_channel,
                                  aux=args.aux)

    elif args.model == "alpha0_5_stage1_double_deep_ep80":
        args.deepsupervision = True
        args.double_down_channel = True
        args.genotype_name = 'alpha0_5_stage1_double_deep_ep80'
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(genotype=genotype,
                                  input_c=args.in_channels,
                                  c=args.init_channels,
                                  num_classes=args.nclass,
                                  meta_node_num=args.middle_nodes,
                                  layers=args.layers,
                                  dp=args.dropout_prob,
                                  use_sharing=args.use_sharing,
                                  double_down_channel=args.double_down_channel,
                                  aux=args.aux)

    # isic trans
    elif args.model == "stage1_layer9_110epoch_double_deep_final":
        args.deepsupervision = True
        args.double_down_channel = True
        args.genotype_name = 'stage1_layer9_110epoch_double_deep_final'
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(genotype=genotype,
                                  input_c=args.in_channels,
                                  c=args.init_channels,
                                  num_classes=args.nclass,
                                  meta_node_num=args.middle_nodes,
                                  layers=args.layers,
                                  dp=args.dropout_prob,
                                  use_sharing=args.use_sharing,
                                  double_down_channel=args.double_down_channel,
                                  aux=args.aux)

    #chaos
    elif args.model == "stage0_double_deep_ep80_newim":
        args.deepsupervision = True
        args.double_down_channel = True
        args.genotype_name = 'stage0_double_deep_ep80_newim'
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(genotype=genotype,
                                  input_c=args.in_channels,
                                  c=args.init_channels,
                                  num_classes=args.nclass,
                                  meta_node_num=args.middle_nodes,
                                  layers=args.layers,
                                  dp=args.dropout_prob,
                                  use_sharing=args.use_sharing,
                                  double_down_channel=args.double_down_channel,
                                  aux=args.aux)

    elif args.model == "stage1_double_deep_ep80":
        args.deepsupervision = True
        args.double_down_channel = True
        args.genotype_name = 'stage1_double_deep_ep80'
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(genotype=genotype,
                                  input_c=args.in_channels,
                                  c=args.init_channels,
                                  num_classes=args.nclass,
                                  meta_node_num=args.middle_nodes,
                                  layers=args.layers,
                                  dp=args.dropout_prob,
                                  use_sharing=args.use_sharing,
                                  double_down_channel=args.double_down_channel,
                                  aux=args.aux)

    elif args.model == "stage1_double_deep_ep80_ts":
        args.deepsupervision = True
        args.double_down_channel = True
        args.genotype_name = 'stage1_double_deep_ep80_ts'
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(genotype=genotype,
                                  input_c=args.in_channels,
                                  c=args.init_channels,
                                  num_classes=args.nclass,
                                  meta_node_num=args.middle_nodes,
                                  layers=args.layers,
                                  dp=args.dropout_prob,
                                  use_sharing=args.use_sharing,
                                  double_down_channel=args.double_down_channel,
                                  aux=args.aux)

    # cvc trans
    elif args.model == "layer7_double_deep":
        args.deepsupervision = True
        args.double_down_channel = True
        args.genotype_name = 'layer7_double_deep'
        genotype = eval('genotypes.%s' % args.genotype_name)
        model = BuildNasUnetPrune(genotype=genotype,
                                  input_c=args.in_channels,
                                  c=args.init_channels,
                                  num_classes=args.nclass,
                                  meta_node_num=args.middle_nodes,
                                  layers=args.layers,
                                  dp=args.dropout_prob,
                                  use_sharing=args.use_sharing,
                                  double_down_channel=args.double_down_channel,
                                  aux=args.aux)

    if torch.cuda.device_count() > 1 and args.use_cuda:
        logger.info('use: %d gpus', torch.cuda.device_count())
        model = nn.DataParallel(model)
    setting = {k: v for k, v in args._get_kwargs()}
    logger.info(setting)
    logger.info(genotype)
    logger.info('param size = %fMB', calc_parameters_count(model))

    # init loss
    if args.loss == 'bce':
        criterion = nn.BCELoss()
    elif args.loss == 'bcelog':
        criterion = nn.BCEWithLogitsLoss()
    elif args.loss == "dice":
        criterion = DiceLoss()
    elif args.loss == "softdice":
        criterion = SoftDiceLoss()
    elif args.loss == 'bcedice':
        criterion = BCEDiceLoss()
    elif args.loss == 'multibcedice':
        criterion = MultiClassEntropyDiceLoss()
    else:
        criterion = nn.CrossEntropyLoss()
    if args.use_cuda:
        logger.info("load model and criterion to gpu !")
        model = model.to(args.device)
        criterion = criterion.to(args.device)
    # init optimizer
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.lr,
                                weight_decay=args.weight_decay,
                                momentum=args.momentum)
    # init schedulers  Steplr
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, args.epoch)
    # scheduler=torch.optim.lr_scheduler.StepLR(optimizer=optimizer,step_size=30,gamma=0.1,last_epoch=-1)
    ############################### check resume #########################
    start_epoch = 0
    if args.resume is not None:
        if os.path.isfile(args.resume):
            logger.info(
                "Loading model and optimizer from checkpoint '{}'".format(
                    args.resume))
            checkpoint = torch.load(args.resume, map_location=args.device)
            start_epoch = checkpoint['epoch']
            optimizer.load_state_dict(checkpoint['optimizer'])
            model.load_state_dict(checkpoint['state_dict'])
            scheduler.load_state_dict(checkpoint['scheduler'])
        else:
            raise FileNotFoundError("No checkpoint found at '{}'".format(
                args.resume))

    #################################### train and val ########################

    max_value = 0
    for epoch in range(start_epoch, args.epoch):
        # lr=adjust_learning_rate(args,optimizer,epoch)
        scheduler.step()
        # logger.info('Epoch: %d lr %e', epoch, scheduler.get_lr()[0])
        # train
        total_loss = train(args, model, criterion, train_loader, optimizer,
                           epoch, logger)
        # write
        writer.add_scalar('Train/total_loss', total_loss, epoch)
        # val
        tloss, md = val(args, model, criterion, val_loader, epoch, logger)
        writer.add_scalar('Val/total_loss', tloss, epoch)

        is_best = True if (md >= max_value) else False
        max_value = max(max_value, md)
        state = {
            'epoch': epoch,
            'optimizer': optimizer.state_dict(),
            'state_dict': model.state_dict(),
            'scheduler': model.state_dict(),
        }
        logger.info("epoch:{} best:{} max_value:{}".format(
            epoch, is_best, max_value))
        if not is_best:
            torch.save(state, os.path.join(args.save_path,
                                           "checkpoint.pth.tar"))
        else:
            torch.save(state, os.path.join(args.save_path,
                                           "checkpoint.pth.tar"))
            torch.save(state, os.path.join(args.save_path,
                                           "model_best.pth.tar"))
    writer.close()
示例#21
0
def trainer_synapse(args, model, snapshot_path):
    from datasets.dataset_synapse import Synapse_dataset, RandomGenerator
    logging.basicConfig(filename=snapshot_path + "/log.txt", level=logging.INFO,
                        format='[%(asctime)s.%(msecs)03d] %(message)s', datefmt='%H:%M:%S')
    logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
    logging.info(str(args))
    base_lr = args.base_lr
    num_classes = args.num_classes
    batch_size = args.batch_size * args.n_gpu
    # max_iterations = args.max_iterations
    db_train = Synapse_dataset(base_dir=args.root_path, list_dir=args.list_dir, split="train",
                               transform=transforms.Compose(
                                   [RandomGenerator(output_size=[args.img_size, args.img_size])]))
    print("The length of train set is: {}".format(len(db_train)))

    def worker_init_fn(worker_id):
        random.seed(args.seed + worker_id)

    trainloader = DataLoader(db_train, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True,
                             worker_init_fn=worker_init_fn)
    if args.n_gpu > 1:
        model = nn.DataParallel(model)
    model.train()
    ce_loss = CrossEntropyLoss()
    dice_loss = DiceLoss(num_classes)
    optimizer = optim.SGD(model.parameters(), lr=base_lr, momentum=0.9, weight_decay=0.0001)
    writer = SummaryWriter(snapshot_path + '/log')
    iter_num = 0
    max_epoch = args.max_epochs
    max_iterations = args.max_epochs * len(trainloader)  # max_epoch = max_iterations // len(trainloader) + 1
    logging.info("{} iterations per epoch. {} max iterations ".format(len(trainloader), max_iterations))
    best_performance = 0.0
    iterator = tqdm(range(max_epoch), ncols=70)
    for epoch_num in iterator:
        for i_batch, sampled_batch in enumerate(trainloader):
            image_batch, label_batch = sampled_batch['image'], sampled_batch['label']
            image_batch, label_batch = image_batch.cuda(), label_batch.cuda()
            outputs = model(image_batch)
            loss_ce = ce_loss(outputs, label_batch[:].long())
            loss_dice = dice_loss(outputs, label_batch, softmax=True)
            loss = 0.5 * loss_ce + 0.5 * loss_dice
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            lr_ = base_lr * (1.0 - iter_num / max_iterations) ** 0.9
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr_

            iter_num = iter_num + 1
            writer.add_scalar('info/lr', lr_, iter_num)
            writer.add_scalar('info/total_loss', loss, iter_num)
            writer.add_scalar('info/loss_ce', loss_ce, iter_num)

            logging.info('iteration %d : loss : %f, loss_ce: %f' % (iter_num, loss.item(), loss_ce.item()))

            if iter_num % 20 == 0:
                image = image_batch[1, 0:1, :, :]
                image = (image - image.min()) / (image.max() - image.min())
                writer.add_image('train/Image', image, iter_num)
                outputs = torch.argmax(torch.softmax(outputs, dim=1), dim=1, keepdim=True)
                writer.add_image('train/Prediction', outputs[1, ...] * 50, iter_num)
                labs = label_batch[1, ...].unsqueeze(0) * 50
                writer.add_image('train/GroundTruth', labs, iter_num)

        save_interval = 50  # int(max_epoch/6)
        if epoch_num > int(max_epoch / 2) and (epoch_num + 1) % save_interval == 0:
            save_mode_path = os.path.join(snapshot_path, 'epoch_' + str(epoch_num) + '.pth')
            torch.save(model.state_dict(), save_mode_path)
            logging.info("save model to {}".format(save_mode_path))

        if epoch_num >= max_epoch - 1:
            save_mode_path = os.path.join(snapshot_path, 'epoch_' + str(epoch_num) + '.pth')
            torch.save(model.state_dict(), save_mode_path)
            logging.info("save model to {}".format(save_mode_path))
            iterator.close()
            break

    writer.close()
    return "Training Finished!"
示例#22
0
def trainer_HuBMAP(args, model, snapshot_path):
    from datasets.dataset_HuBMAP import HuBMAP_dataset, RandomGenerator
    from datasets.dataset_HuBMAP import HuBMAP_dataset, Generator
    logging.basicConfig(filename=snapshot_path + "/log.txt", level=logging.INFO,
                        format='[%(asctime)s.%(msecs)03d] %(message)s', datefmt='%H:%M:%S')
    logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
    logging.info(str(args))
    base_lr = args.base_lr
    num_classes = args.num_classes
    batch_size = args.batch_size * args.n_gpu
    # max_iterations = args.max_iterations
    db_train = HuBMAP_dataset(base_dir=args.root_path, list_dir=args.list_dir, split="train",
                               transform=transforms.Compose(
                                   [RandomGenerator(output_size=[args.img_size, args.img_size])]))
    print("The length of train set is: {}".format(len(db_train)))

    ###azhe!
    db_val = HuBMAP_dataset(base_dir=args.root_path, list_dir=args.list_dir, split="val",
                               transform=transforms.Compose(
                                   [Generator(output_size=[args.img_size, args.img_size])]))
    print("The length of val set is: {}".format(len(db_val)))
    ###azhe!

    def worker_init_fn(worker_id):
        random.seed(args.seed + worker_id)

    trainloader = DataLoader(db_train, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True,
                             worker_init_fn=worker_init_fn)   
    ### val loader
    valloader = DataLoader(db_val, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True,
                             worker_init_fn=worker_init_fn)
    
    if args.n_gpu > 1:
        model = nn.DataParallel(model)
    model.train()
    ce_loss = CrossEntropyLoss()
    dice_loss = DiceLoss(num_classes)
    optimizer = optim.SGD(model.parameters(), lr=base_lr, momentum=0.9, weight_decay=0.0001)
    writer = SummaryWriter(snapshot_path + '/log')
    iter_num = 0
    max_epoch = args.max_epochs
    max_iterations = args.max_epochs * len(trainloader)  # max_epoch = max_iterations // len(trainloader) + 1
    logging.info("{} iterations per epoch. {} max iterations ".format(len(trainloader), max_iterations))
    best_performance = 0.0
    low_val_loss_dice = np.inf 
    

    train_loss_list = []
    train_loss_dice_list = []
    val_loss_list = []
    val_loss_dice_list = []
    for epoch_num in range(max_epoch):
        total_train_loss = 0
        total_train_dice_loss = 0
        batch_num = 0
        for i_batch, sampled_batch in enumerate(trainloader):

            print("epoch: "+ str(epoch_num) + " training progress: {:.2f}".format(batch_num/len(trainloader)*100) + "%", end="\r")
            image_batch, label_batch = sampled_batch['image'], sampled_batch['label']
            image_batch, label_batch = image_batch.cuda(), label_batch.cuda()
            outputs = model(image_batch)
            loss_ce = ce_loss(outputs, label_batch[:].long())
            loss_dice = dice_loss(outputs, label_batch, softmax=True)
            loss = 0.5 * loss_ce + 0.5 * loss_dice
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            lr_ = base_lr * (1.0 - iter_num / max_iterations) ** 0.9
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr_
            ### get total loss
            total_train_loss += loss.item()
            total_train_dice_loss += loss_dice.item()
            ###  
            # update iter num for adaptive leraning rate
            # update batch_num for getting average loss
            writer.add_scalar('info/lr', lr_, iter_num)
            iter_num = iter_num + 1
            batch_num += 1

        avg_train_loss = total_train_loss/batch_num
        avg_train_loss_dice = total_train_dice_loss/batch_num
        writer.add_scalar('info/avg_train_loss', avg_train_loss, epoch_num)
        writer.add_scalar('info/avg_train_loss_dice', avg_train_loss_dice, epoch_num)
        train_loss_list.append(avg_train_loss)
        train_loss_dice_list.append(avg_train_loss_dice)
        np.save('train_loss.npy', train_loss_list)
        np.save('train_loss_dice.npy', train_loss_dice_list)


        if epoch_num % 1 == 0:
            image = image_batch[1, 0:1, :, :]
            image = (image - image.min()) / (image.max() - image.min())
            writer.add_image('train/Image', image, epoch_num)
            outputs = torch.argmax(torch.softmax(outputs, dim=1), dim=1, keepdim=True)
            writer.add_image('train/Prediction', outputs[1, ...] * 50, epoch_num)
            labs = label_batch[1, ...].unsqueeze(0) * 50
            writer.add_image('train/GroundTruth', labs, epoch_num)   


        ######################### VALIDATION ###########################
        total_val_loss = 0
        total_val_dice_loss = 0
        batch_num = 0

        for i_batch, sampled_batch in enumerate(valloader):
            print("epoch: "+ str(epoch_num) + " validation progress: {:.2f}".format(batch_num/len(valloader)*100) + "%", end="\r")
            model.eval()
            image_batch, label_batch = sampled_batch['image'], sampled_batch['label']
            image_batch, label_batch = image_batch.cuda(), label_batch.cuda()
            outputs = model(image_batch)
            
            np.save('val_pred.npy', outputs.detach().cpu().numpy())
            np.save('val_img.npy', image_batch.detach().cpu().numpy())
            np.save('val_label.npy',label_batch.detach().cpu().numpy())
            
            loss_ce = ce_loss(outputs, label_batch[:].long())
            loss_dice = dice_loss(outputs, label_batch, softmax=True)
            loss = 0.5 * loss_ce + 0.5 * loss_dice
            optimizer.zero_grad()
            
            ###
            total_val_loss += loss.item()
            total_val_dice_loss += loss_dice.item()
            ###
            
            batch_num = batch_num + 1
            
        avg_val_loss = total_val_loss/batch_num   
        avg_val_loss_dice = total_val_dice_loss/batch_num

        writer.add_scalar('info/avg_val_loss', avg_val_loss, epoch_num)
        writer.add_scalar('info/avg_val_loss_dice', avg_val_loss_dice, epoch_num)
        logging.info('Epoch %d : train_loss : %f, train_loss_dice: %f, val_loss: %f, val_loss_dice: %f' % (epoch_num, avg_train_loss, avg_train_loss_dice,avg_val_loss, avg_val_loss_dice))
        
        val_loss_list.append(avg_val_loss)
        val_loss_dice_list.append(avg_val_loss_dice)
        np.save('val_loss.npy', val_loss_list)
        np.save('val_loss_dice.npy', val_loss_dice_list)


        if epoch_num % 1 == 0:
            image = image_batch[1, 0:1, :, :]
            image = (image - image.min()) / (image.max() - image.min())
            writer.add_image('val/Image', image, epoch_num)
            outputs = torch.argmax(torch.softmax(outputs, dim=1), dim=1, keepdim=True)
            writer.add_image('val/Prediction', outputs[1, ...] * 50, epoch_num)
            labs = label_batch[1, ...].unsqueeze(0) * 50
            writer.add_image('val/GroundTruth', labs, epoch_num)  
        
        if avg_val_loss_dice < low_val_loss_dice:
            low_val_loss_dice = avg_val_loss_dice
            save_mode_path = os.path.join(snapshot_path, 'epoch_' + str(epoch_num) + '.pth')
            torch.save(model.state_dict(), save_mode_path)
            logging.info("current best model find!!")     

        del sampled_batch, image_batch, label_batch
        gc.collect()
        torch.cuda.empty_cache() 
        ###
        '''
        save_interval = 50  # int(max_epoch/6)
        if epoch_num > int(max_epoch / 2) and (epoch_num + 1) % save_interval == 0:
            save_mode_path = os.path.join(snapshot_path, 'epoch_' + str(epoch_num) + '.pth')
            torch.save(model.state_dict(), save_mode_path)
            logging.info("save model to {}".format(save_mode_path))

        if epoch_num >= max_epoch - 1:
            save_mode_path = os.path.join(snapshot_path, 'epoch_' + str(epoch_num) + '.pth')
            torch.save(model.state_dict(), save_mode_path)
            logging.info("save model to {}".format(save_mode_path))
            iterator.close()
            break
        '''
    writer.close()
    return "Training Finished!"
示例#23
0
def main(args):
    ############    init config ################
    model_name = args.model
    assert model_name in models_dict.keys(),"The Usage model is not exist !"
    print('Usage model :{}'.format(model_name))

    #################### init logger ###################################
    log_dir = './logs/'+ args.model+'_'+args.note + '/{}'.format(time.strftime('%Y%m%d-%H%M%S'))
    logger = get_logger(log_dir)
    print('RUNDIR: {}'.format(log_dir))
    logger.info('{}-Train'.format(args.model))
    # setting
    setting={k: v for k, v in args._get_kwargs()}
    logger.info(setting)
    args.save_path = log_dir
    args.save_tbx_log = args.save_path + '/tbx_log'
    writer = SummaryWriter(args.save_tbx_log)
    ##################### init device #################################
    if args.manualSeed is None:
        args.manualSeed = random.randint(1, 10000)
    np.random.seed(args.manualSeed)
    torch.manual_seed(args.manualSeed)
    args.use_cuda= args.gpus>0 and torch.cuda.is_available()
    args.device = torch.device('cuda' if args.use_cuda else 'cpu')
    if args.use_cuda:
        torch.cuda.manual_seed(args.manualSeed)
        cudnn.benchmark = True
    ####################### init dataset ###########################################
    train_loader=get_dataloder(args,split_flag="train")
    val_loader=get_dataloder(args,split_flag="valid")
    ######################## init model ############################################
    # model
    logger.info("Model Dict has keys: \n {}".format(models_dict.keys()))
    model=get_models(args)
    if torch.cuda.device_count() > 1 and args.use_cuda:
        logger.info('use: %d gpus', torch.cuda.device_count())
        model = nn.DataParallel(model)
    logger.info('param size = %fMB', calc_parameters_count(model))
    # init loss
    if args.loss=='bce':
        criterion=nn.BCELoss()
    elif args.loss=='bcelog':
        criterion=nn.BCEWithLogitsLoss()
    elif args.loss=="dice":
        criterion=DiceLoss()
    elif args.loss=="softdice":
        criterion=SoftDiceLoss()
    elif args.loss=='bcedice':
        criterion=BCEDiceLoss()
    else:
        criterion=nn.CrossEntropyLoss()
    if args.use_cuda:
        logger.info("load model and criterion to gpu !")
        model=model.to(args.device)
        criterion=criterion.to(args.device)
    # init optimizer
    if args.model_optimizer=="sgd":
        #torch.optim.SGD(parametetrs,lr=args.lr,weight_decay=args.weight_decay,momentum=args.momentum)
        optimizer=torch.optim.SGD(model.parameters(),lr=args.lr,weight_decay=args.weight_decay,momentum=args.momentum)
    else:
        optimizer=torch.optim.Adam(model.parameters(),args.lr,[args.beta1, args.beta2],
                                   weight_decay=args.weight_decay)

    # init schedulers  Steplr
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,args.epoch)
    # scheduler=torch.optim.lr_scheduler.StepLR(optimizer=optimizer,step_size=30,gamma=0.1,last_epoch=-1)
    ############################### check resume #########################
    start_epoch=0
    if args.resume is not None:
        if os.path.isfile(args.resume):
            logger.info("Loading model and optimizer from checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location=args.device)
            start_epoch = checkpoint['epoch']
            optimizer.load_state_dict(checkpoint['optimizer'])
            model.load_state_dict(checkpoint['state_dict'])
            scheduler.load_state_dict(checkpoint['scheduler'])
        else:
            raise FileNotFoundError("No checkpoint found at '{}'".format(args.resume))

    #################################### train and val ########################
    max_value=0
    for epoch in range(start_epoch,args.epoch):
        # lr=adjust_learning_rate(args,optimizer,epoch)
        scheduler.step()
        logger.info('Epoch: %d lr %e', epoch, scheduler.get_lr()[0])
        # train
        mr, ms, mp, mf, mjc, md, macc, mean_loss=train(args, model, criterion, train_loader,
                                                       optimizer, epoch, logger)
        # write
        writer.add_scalar('Train/Loss', mean_loss, epoch)
        writer.add_scalar('Train/mAcc', macc, epoch)
        writer.add_scalar('Train/Recall', mr, epoch)
        writer.add_scalar('Train/Specifi', ms, epoch)
        writer.add_scalar('Train/Precision', mp, epoch)
        writer.add_scalar('Train/F1', mf, epoch)
        writer.add_scalar('Train/Jc', mjc, epoch)
        writer.add_scalar('Train/Dice', md, epoch)

        # val
        vmr, vms, vmp, vmf, vmjc, vmd, vmacc, vmean_loss=val(args, model, criterion, val_loader, epoch, logger)

        writer.add_scalar('Val/Loss', vmean_loss, epoch)
        writer.add_scalar('Val/mAcc', vmacc, epoch)
        writer.add_scalar('Val/Recall', vmr, epoch)
        writer.add_scalar('Val/Specifi', vms, epoch)
        writer.add_scalar('Val/Precision', vmp, epoch)
        writer.add_scalar('Val/F1', vmf, epoch)
        writer.add_scalar('Val/Jc', vmjc, epoch)
        writer.add_scalar('Val/Dice', vmd, epoch)

        is_best=True if (vmjc>=max_value) else False
        max_value=max(max_value,vmjc)
        state={
                'epoch': epoch,
                'optimizer': optimizer.state_dict(),
                'state_dict': model.state_dict(),
                'scheduler': model.state_dict(),
            }
        logger.info("epoch:{} best:{} max_value:{}".format(epoch,is_best,max_value))
        if not is_best:
            torch.save(state,os.path.join(args.save_path,"checkpoint.pth.tar"))
        else:
            torch.save(state,os.path.join(args.save_path,"checkpoint.pth.tar"))
            torch.save(state,os.path.join(args.save_path,"model_best.pth.tar"))

    writer.close()
示例#24
0
def trainer_synapse(args, model, snapshot_path):
    from datasets.dataset_synapse import Synapse_dataset, LiTS_dataset, KiTS_dataset, RandomGenerator
    logging.basicConfig(filename=snapshot_path + "/log.txt",
                        level=logging.INFO,
                        format='[%(asctime)s.%(msecs)03d] %(message)s',
                        datefmt='%H:%M:%S')
    logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
    logging.info(str(args))
    base_lr = args.base_lr
    num_classes = args.num_classes
    batch_size = args.batch_size * args.n_gpu
    # max_iterations = args.max_iterations
    # db_train = Synapse_dataset(base_dir=args.root_path, list_dir=args.list_dir, split="train",
    #                            transform=transforms.Compose(
    #                                [RandomGenerator(output_size=[args.img_size, args.img_size])]))
    if args.dataset == 'LiTS':
        db_train = LiTS_dataset(
            base_dir=args.root_path,
            split='train',
            transform=transforms.Compose(
                [RandomGenerator(output_size=[args.img_size, args.img_size])]))
    elif 'LiTS_tumor' in args.dataset:
        db_train = LiTS_dataset(
            base_dir=args.root_path,
            split='train',
            transform=transforms.Compose(
                [RandomGenerator(output_size=[args.img_size, args.img_size])]),
            tumor_only=True)
    elif 'KiTS_tumor' in args.dataset:
        db_train = KiTS_dataset(
            base_dir=args.root_path,
            split='train',
            transform=transforms.Compose(
                [RandomGenerator(output_size=[args.img_size, args.img_size])]),
            tumor_only=True)
    else:
        raise NotImplementedError('dataset not found!')

    print("The length of train set is: {}".format(len(db_train)))

    def worker_init_fn(worker_id):
        random.seed(args.seed + worker_id)

    trainloader = DataLoader(db_train,
                             batch_size=batch_size,
                             shuffle=True,
                             num_workers=8,
                             pin_memory=True,
                             worker_init_fn=worker_init_fn,
                             drop_last=True)
    if args.n_gpu > 1:
        model = nn.DataParallel(model)
    model.train()
    if args.unfreeze_epoch:
        model.freeze_backbone = True
    ce_loss = CrossEntropyLoss()
    dice_loss = DiceLoss(num_classes)
    optimizer = optim.SGD(model.parameters(),
                          lr=base_lr,
                          momentum=0.9,
                          weight_decay=0.0001)
    writer = SummaryWriter(snapshot_path + '/log')
    iter_num = 0
    max_epoch = args.max_epochs
    max_iterations = args.max_epochs * len(
        trainloader)  # max_epoch = max_iterations // len(trainloader) + 1
    logging.info("{} iterations per epoch. {} max iterations ".format(
        len(trainloader), max_iterations))
    best_performance = 0.0
    iterator = tqdm(range(max_epoch), ncols=70)
    for epoch_num in iterator:
        if epoch_num + 1 == args.unfreeze_epoch:
            base_lr /= 10
            model.freeze_backbone = False
            for g in optimizer.param_groups:
                g['lr'] = base_lr
            logging.info(
                'unfreezing backbone, reducing learning rate to {}'.format(
                    base_lr))
        for i_batch, sampled_batch in enumerate(trainloader):
            image_batch, label_batch = sampled_batch['image'], sampled_batch[
                'label']
            image_batch, label_batch = image_batch.cuda(), label_batch.cuda()
            aux_outputs = None
            if args.model == 'deeplab_resnest':
                outputs, aux_outputs = model(image_batch)
            else:
                outputs = model(image_batch)
            loss_ce = ce_loss(outputs, label_batch[:].long())
            if args.dataset == 'LiTS_tumor':
                loss_dice = dice_loss(outputs,
                                      label_batch,
                                      weight=[1, 1],
                                      softmax=True)
            else:
                loss_dice = dice_loss(outputs, label_batch, softmax=True)
            loss = 0.5 * loss_ce + 0.5 * loss_dice
            if aux_outputs != None:
                loss_ce_aux = ce_loss(aux_outputs, label_batch[:].long())
                loss_dice_aux = dice_loss(aux_outputs,
                                          label_batch,
                                          softmax=True)
                loss += 0.4 * (0.5 * loss_ce_aux + 0.5 * loss_dice_aux)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            lr_ = base_lr * (1.0 - iter_num / max_iterations)**0.9
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr_

            iter_num = iter_num + 1
            writer.add_scalar('info/lr', lr_, iter_num)
            writer.add_scalar('info/total_loss', loss, iter_num)
            writer.add_scalar('info/loss_ce', loss_ce, iter_num)

            logging.info('epoch %d iteration %d : loss : %f, loss_ce: %f' %
                         (epoch_num, iter_num, loss.item(), loss_ce.item()))

            if iter_num % 20 == 0:
                image = image_batch[1, 0:1, :, :]
                image = (image - image.min()) / (image.max() - image.min())
                writer.add_image('train/Image', image, iter_num)
                outputs = torch.argmax(torch.softmax(outputs, dim=1),
                                       dim=1,
                                       keepdim=True)
                writer.add_image('train/Prediction', outputs[1, ...] * 50,
                                 iter_num)
                labs = label_batch[1, ...].unsqueeze(0) * 50
                writer.add_image('train/GroundTruth', labs, iter_num)

        eval_interval = 5
        if (epoch_num + 1) % eval_interval == 0:
            tumor_dice = inference(args, model, epoch_num + 1)
            model.train()
            if args.model == 'deeplab_resnest':
                model.mode = 'TRAIN'
            writer.add_scalar('info/tumor_dice', tumor_dice, iter_num)
            if tumor_dice > best_performance:
                best_performance = tumor_dice
                save_mode_path = os.path.join(
                    snapshot_path, 'best_model_ep' + str(epoch_num) + '.pth')
                torch.save(model.state_dict(), save_mode_path)
                logging.info("save model to {}".format(save_mode_path))

        save_interval = 50  # int(max_epoch/6)
        if epoch_num > int(
                max_epoch / 2) and (epoch_num + 1) % save_interval == 0:
            save_mode_path = os.path.join(snapshot_path,
                                          'epoch_' + str(epoch_num) + '.pth')
            torch.save(model.state_dict(), save_mode_path)
            logging.info("save model to {}".format(save_mode_path))

        if epoch_num >= max_epoch - 1:
            save_mode_path = os.path.join(snapshot_path,
                                          'epoch_' + str(epoch_num) + '.pth')
            torch.save(model.state_dict(), save_mode_path)
            logging.info("save model to {}".format(save_mode_path))
            if args.pretrain_epoch != -1:
                logdir = snapshot_path[:snapshot_path.rfind('/') + 1]
                with open(logdir + 'log_all.txt', "a") as logfile:
                    logfile.write(
                        f'{args.pretrain_epoch}: {best_performance}\n')
            iterator.close()
            break

    writer.close()
    return "Training Finished!"