Пример #1
0
def main(args):
    global best_acc
    global best_auc

    if not os.path.exists(args.checkpoint):
        os.makedirs(args.checkpoint)

    print("==> Creating model '{}-{}', stacks={}, blocks={}, feats={}".format(
        args.netType, args.pointType, args.nStacks, args.nModules,
        args.nFeats))

    print("=> Models will be saved at: {}".format(args.checkpoint))

    model = models.__dict__[args.netType](num_stacks=args.nStacks,
                                          num_blocks=args.nModules,
                                          num_feats=args.nFeats,
                                          use_se=args.use_se,
                                          use_attention=args.use_attention,
                                          num_classes=68)

    model = torch.nn.DataParallel(model).cuda()

    criterion = torch.nn.MSELoss(size_average=True).cuda()

    optimizer = torch.optim.RMSprop(model.parameters(),
                                    lr=args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)

    title = args.checkpoint.split('/')[-1] + ' on ' + args.data.split('/')[-1]

    Loader = get_loader(args.data)

    val_loader = torch.utils.data.DataLoader(Loader(args, 'A'),
                                             batch_size=args.val_batch,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    if args.resume:
        if os.path.isfile(args.resume):
            print("=> Loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_acc = checkpoint['best_acc']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> Loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
            logger = Logger(os.path.join(args.checkpoint, 'log.txt'),
                            title=title,
                            resume=True)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    else:
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names([
            'Epoch', 'LR', 'Train Loss', 'Valid Loss', 'Train Acc', 'Val Acc',
            'AUC'
        ])

    cudnn.benchmark = True
    print('=> Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / (1024. * 1024)))

    if args.evaluation:
        print('=> Evaluation only')
        D = args.data.split('/')[-1]
        save_dir = os.path.join(args.checkpoint, D)
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        loss, acc, predictions, auc = validate(val_loader, model, criterion,
                                               args.netType, args.debug,
                                               args.flip)
        save_pred(predictions, checkpoint=save_dir)
        return

    train_loader = torch.utils.data.DataLoader(Loader(args, 'train'),
                                               batch_size=args.train_batch,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)
    lr = args.lr
    for epoch in range(args.start_epoch, args.epochs):
        lr = adjust_learning_rate(optimizer, epoch, lr, args.schedule,
                                  args.gamma)
        print('=> Epoch: %d | LR %.8f' % (epoch + 1, lr))

        train_loss, train_acc = train(train_loader, model, criterion,
                                      optimizer, args.netType, args.debug,
                                      args.flip)
        # do not save predictions in model file
        valid_loss, valid_acc, predictions, valid_auc = validate(
            val_loader, model, criterion, args.netType, args.debug, args.flip)

        logger.append([
            int(epoch + 1), lr, train_loss, valid_loss, train_acc, valid_acc,
            valid_auc
        ])

        is_best = valid_auc >= best_auc
        best_auc = max(valid_auc, best_auc)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'netType': args.netType,
                'state_dict': model.state_dict(),
                'best_acc': best_auc,
                'optimizer': optimizer.state_dict(),
            },
            is_best,
            predictions,
            checkpoint=args.checkpoint)

    logger.close()
    logger.plot(['AUC'])
    savefig(os.path.join(args.checkpoint, 'log.eps'))
Пример #2
0
            {
                'epoch': epoch + 1,
                'netType': args.netType,
                'state_dict': model.state_dict(),
                'best_acc': best_auc,
                'optimizer': optimizer.state_dict(),
            },
            is_best,
            predictions,
            checkpoint=args.checkpoint,
            filename='checkpoint.pth.tar',
            snapshot=args.snapshot)

    logger.close()
    logger.plot(['AUC'])
    savefig(os.path.join(args.checkpoint, 'log.eps'))


def train(loader, model, criterion, optimizer, netType, debug=False, flip=True):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    acces = AverageMeter()

    model.train()
    end = time.time()

    gt_win, pred_win = None, None
    bar = Bar('Training', max=len(loader))
    for i, (inputs, target) in enumerate(loader):
        data_time.update(time.time() - end)
def main():
    global best_acc
    start_epoch = args.start_epoch  # start from epoch 0 or last checkpoint epoch
    
    experimentID = args.experimentID%(args.arch, args.cv)
    # args.data = args.data%('%s',args.cv)
    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)
    
    if not os.path.isdir(os.path.join(args.checkpoint, experimentID)):
        mkdir_p(os.path.join(args.checkpoint, experimentID))
    
    checkpoint_dir = os.path.join(args.checkpoint, experimentID)
    
    # Data loading code
    train_dataset = EnsembleDataset(args, 'train')
    train_distri = train_dataset.get_label_distri()
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.train_batch,
                                               shuffle=not args.serial_batches,
                                               num_workers=int(args.workers))

    valid_dataset = EnsembleDataset(args, 'valid')
    val_loader = torch.utils.data.DataLoader(valid_dataset,
                                             batch_size=args.test_batch,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    test_dataset = EnsembleDataset(args, 'test')
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                             batch_size=args.test_batch,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    # create model
    if args.pretrained:
        print("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True)
    elif args.arch.startswith('resnext'):
        model = models.__dict__[args.arch](
                    baseWidth=args.base_width,
                    cardinality=args.cardinality,
                )
    else:
        print("=> creating model '{}'".format(args.arch))
        model = models.__dict__[args.arch]()

    if use_cuda:
        model = torch.nn.DataParallel(model).cuda()

    cudnn.benchmark = True
    print('    Total params: %.2fM' % (sum(p.numel() for p in model.parameters())/1000000.0))

    # define loss function (criterion) and optimizer
    print (train_distri)
#    return
    criterion = focalloss(gamma=10, label_distri = train_distri, model_name = args.arch, cuda_a = use_cuda)
#    criterion = nn.CrossEntropyLoss()
#    criterion = nn.KLDivLoss()
    optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
#    optimizer = optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    
    if args.test is False:
      # Resume
      title = args.arch
      if args.resume:
          # Load checkpoint.
          print('==> Resuming from checkpoint..')
          checkpoint_path = os.path.join(checkpoint_dir,args.resume+'.checkpoint.pth.tar')
          print (checkpoint_path)
          assert os.path.isfile(checkpoint_path), 'Error: no checkpoint directory found!'
          checkpoint = torch.load(checkpoint_path)
          best_acc = checkpoint['best_acc']
          start_epoch = checkpoint['epoch']
          model.load_state_dict(checkpoint['state_dict'])
          optimizer.load_state_dict(checkpoint['optimizer'])
          logger = Logger(os.path.join(checkpoint_dir, 'log.txt'), title=title, resume=True)
      else:
          logger = Logger(os.path.join(checkpoint_dir, 'log.txt'), title=title)
          logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.'])


    if args.test:
        print('\Test only')
        if len(args.resume) > 0:
          print ('load %s-th checkpoint'%args.resume)
          checkpoint_path = os.path.join(checkpoint_dir,args.resume+'.checkpoint.pth.tar')
        else:
          print ('load best checkpoint')
          checkpoint_path = os.path.join(checkpoint_dir,'model_best.pth.tar')
        print (checkpoint_path)
        assert os.path.isfile(checkpoint_path), 'Error: no checkpoint directory found!'
        checkpoint = torch.load(checkpoint_path)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
          
        if not os.path.isdir(args.results):
            mkdir_p(args.results)
        if not os.path.isdir(os.path.join(args.results, experimentID)):
            mkdir_p(os.path.join(args.results, experimentID))
        results_dir = os.path.join(args.results, experimentID)
        test_loss, test_acc, pred_d, real_d = test(test_loader, model, criterion, start_epoch, use_cuda)
        
        with open(os.path.join(results_dir, 'result_detail.csv'), 'w') as f:
            csv_writer = csv.writer(f)
            for i in range(len(real_d)):
                x = np.zeros(len(pred_d[i]))
                x[real_d[i]] = 1
                y = np.exp(pred_d[i])/np.sum(np.exp(pred_d[i]))
                csv_writer.writerow(list(y) + list(x))

        mr = MeasureR(results_dir, test_loss, test_acc)
        mr.output()
        print(' Test Loss:  %.8f, Test Acc:  %.2f' % (test_loss, test_acc))
        return
    
    for epoch in range(start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)
        print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr']))
        train_loss, train_acc = train(train_loader, model, criterion, optimizer, epoch, use_cuda)
        test_loss, test_acc, _, _ = test(val_loader, model, criterion, epoch, use_cuda)
        l_loss, l_acc, _, _ = test(test_loader, model, criterion, epoch, use_cuda)
        
        print (train_loss, train_acc, test_acc, l_acc)
        # append logger file
        logger.append([state['lr'], train_loss.cpu(), test_loss.cpu(), train_acc.cpu(), test_acc.cpu()])

        # save model
        is_best = test_acc > best_acc
        best_acc = max(test_acc, best_acc)
        if is_best or epoch%args.checkpoint_saved_n == 0:
          save_checkpoint({
                  'epoch': epoch,
                  'state_dict': model.state_dict(),
                  'acc': test_acc,
                  'best_acc': best_acc,
                  'optimizer' : optimizer.state_dict(),
              }, epoch, is_best, checkpoint=checkpoint_dir)

    logger.close()
    logger.plot()
    savefig(os.path.join(checkpoint_dir, 'log.png'))

    print('Best acc:')
    print(best_acc)
Пример #4
0
def main():
    global best_acc
    start_epoch = args.start_epoch  # start from epoch 0 or last checkpoint epoch

    experimentID = args.experimentID % args.arch
    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    if not os.path.isdir(os.path.join(args.checkpoint, experimentID)):
        mkdir_p(os.path.join(args.checkpoint, experimentID))

    checkpoint_dir = os.path.join(args.checkpoint, experimentID)

    # Data loading code
    train_dataset = XrayDataset(args, 'train')
    train_distri = train_dataset.get_label_distri()
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.train_batch,
                                               shuffle=False,
                                               num_workers=int(args.workers))
    valid_dataset = XrayDataset(args, 'valid')
    val_loader = torch.utils.data.DataLoader(valid_dataset,
                                             batch_size=args.test_batch,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)
    test_dataset = XrayDataset(args, 'test')
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=args.test_batch,
                                              shuffle=False,
                                              num_workers=args.workers,
                                              pin_memory=True)
    #    loders = [(test_loader, 'test')]
    loders = [(train_loader, 'train'), (val_loader, 'valid'),
              (test_loader, 'test')]
    # create model
    if args.pretrained:
        print("=> using pre-trained model '{}'".format(args.arch))

        if args.arch.startswith('resnext'):
            model = models.__dict__[args.arch](
                baseWidth=args.base_width,
                cardinality=args.cardinality,
            )
        else:
            model = models.__dict__[args.arch](pretrained=True)
    else:
        print("=> creating model '{}'".format(args.arch))
        model = models.__dict__[args.arch]()
#    for key, value in model.state_dict().items():
#      print (key, value.shape)
    if args.arch == 'vgg19_bn':
        model.classifier[6] = torch.nn.Linear(4096, 4, bias=True)
    elif args.arch == "inception_v3":
        model.fc = torch.nn.Linear(2048, 4, bias=True)
    elif args.arch == "resnext101_32x8d":
        model.fc = torch.nn.Linear(2048, 4, bias=True)
    elif args.arch == "alexnet":
        model.classifier[6] = torch.nn.Linear(model.classifier[6].in_features,
                                              4,
                                              bias=True)
    elif args.arch == 'resnet18':
        model.fc = torch.nn.Linear(512, 4, bias=True)
    elif args.arch == 'resnet50':
        model.fc = torch.nn.Linear(2048, 4, bias=True)
    elif args.arch == 'resnet101':
        model.fc = torch.nn.Linear(2048, 4, bias=True)
    elif args.arch == 'resnet152':
        model.fc = torch.nn.Linear(2048, 4, bias=True)
    elif args.arch == 'densenet121':
        model.classifier = torch.nn.Linear(1024, 4, bias=True)
    elif args.arch == 'densenet161':
        model.classifier = torch.nn.Linear(2208, 4, bias=True)

    if use_cuda:
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()

    cudnn.benchmark = True
    print('    Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))

    # define loss function (criterion) and optimizer
    #    criterion = focalloss(label_distri = train_distri, model_name = args.arch)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(),
                           lr=args.lr,
                           weight_decay=args.weight_decay)

    if args.test is False:
        # Resume
        title = args.arch
        if args.resume:
            # Load checkpoint.
            print('==> Resuming from checkpoint..')
            checkpoint_path = os.path.join(checkpoint_dir,
                                           args.resume + '.checkpoint.pth.tar')
            print(checkpoint_path)
            assert os.path.isfile(
                checkpoint_path), 'Error: no checkpoint directory found!'
            checkpoint = torch.load(checkpoint_path)
            best_acc = checkpoint['best_acc']
            start_epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            logger = Logger(os.path.join(checkpoint_dir, 'log.txt'),
                            title=title,
                            resume=True)
        else:
            logger = Logger(os.path.join(checkpoint_dir, 'log.txt'),
                            title=title)
            logger.set_names([
                'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.',
                'Valid Acc.'
            ])

    if args.test:
        print('\Test only')
        if len(args.resume) > 0:
            print('load %s-th checkpoint' % args.resume)
            checkpoint_path = os.path.join(checkpoint_dir,
                                           args.resume + '.checkpoint.pth.tar')
        else:
            print('load best checkpoint')
            checkpoint_path = os.path.join(checkpoint_dir,
                                           'model_best.pth.tar')
        print(checkpoint_path)
        assert os.path.isfile(
            checkpoint_path), 'Error: no checkpoint directory found!'
        checkpoint = torch.load(checkpoint_path)
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])

        if not os.path.isdir(args.results):
            mkdir_p(args.results)
        if not os.path.isdir(os.path.join(args.results, experimentID)):
            mkdir_p(os.path.join(args.results, experimentID))
        results_dir = os.path.join(args.results, experimentID)
        runtype = []
        for func in loders:
            test_loss, test_acc, pred_d, real_d = test(func[0], model,
                                                       criterion, start_epoch,
                                                       use_cuda)
            with open(
                    os.path.join(
                        results_dir,
                        'result_detail_%s_%s_cv1.csv' % (args.arch, func[1])),
                    'w') as f:
                csv_writer = csv.writer(f)
                for i in range(len(real_d)):
                    x = np.zeros(len(pred_d[i]))
                    x[real_d[i]] = 1
                    #                  y = np.exp(pred_d[i])/np.sum(np.exp(pred_d[i]))
                    csv_writer.writerow(list(np.array(pred_d[i])) + list(x))


#        mr = MeasureR(results_dir, test_loss, test_acc)
#        mr.output()
            print(' Test Loss:  %.8f, Test Acc:  %.4f' % (test_loss, test_acc))
        return

    for epoch in range(start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)
        print('\nEpoch: [%d | %d] LR: %f' %
              (epoch + 1, args.epochs, state['lr']))
        train_loss, train_acc = train(train_loader, model, criterion,
                                      optimizer, epoch, use_cuda)
        test_loss, test_acc, _, _ = test(val_loader, model, criterion, epoch,
                                         use_cuda)
        l_loss, l_acc, _, _ = test(test_loader, model, criterion, epoch,
                                   use_cuda)

        print(train_loss, train_acc, test_acc, l_acc)
        # append logger file
        logger.append(
            [state['lr'], train_loss, test_loss, train_acc, test_acc])

        # save model
        is_best = test_acc > best_acc
        best_acc = max(test_acc, best_acc)
        if epoch % args.checkpoint_saved_n == 0:
            save_checkpoint(
                {
                    'epoch': epoch,
                    'state_dict': model.state_dict(),
                    'acc': test_acc,
                    'best_acc': best_acc,
                    'optimizer': optimizer.state_dict(),
                },
                epoch,
                is_best,
                checkpoint=checkpoint_dir)

    logger.close()
    logger.plot()
    savefig(os.path.join(checkpoint_dir, 'log.eps'))

    print('Best acc:')
    print(best_acc)
Пример #5
0
def main(args):
    # Seed
    torch.manual_seed(args.seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    np.random.seed(args.seed)

    if args.featurize_mode:
        msg = "To perform featurization, use evaluation mode"
        assert args.evaluate and args.evaluate_video, msg
        msg = (
            f"Until we fully understand the implications of multi-worker caching, we "
            f"should avoid using multiple workers (requested {args.workers})")
        assert args.workers <= 1, msg

    # create checkpoint dir
    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    # Overload print statement to log to file
    setup_verbose_logging(Path(args.checkpoint))
    logger_name = "train" if not args.evaluate else "eval"
    plog = logging.getLogger(logger_name)

    opts.print_args(args)
    opts.save_args(args, save_folder=args.checkpoint)

    if not args.debug:
        plt.switch_backend("agg")

    # create model
    plog.info(f"==> creating model '{args.arch}', out_dim={args.num_classes}")
    if args.arch == "InceptionI3d":
        model = models.__dict__[args.arch](
            num_classes=args.num_classes,
            spatiotemporal_squeeze=True,
            final_endpoint="Logits",
            name="inception_i3d",
            in_channels=3,
            dropout_keep_prob=0.5,
            num_in_frames=args.num_in_frames,
            include_embds=args.include_embds,
        )
        if args.save_features:
            msg = "Set --include_embds 1 to save_features"
            assert args.include_embds, msg
    elif args.arch == "Pose2Sign":
        model = models.Pose2Sign(num_classes=args.num_classes, )
    else:
        model = models.__dict__[args.arch](num_classes=args.num_classes, )

    device = "cuda" if torch.cuda.is_available() else "cpu"

    # adjust for opts for multi-gpu training. Note that we also apply warmup to the
    # learning rate. Can technically remove this if-statement, but leaving for now
    # to make the change explicit.
    if args.num_gpus > 1:
        num_gpus = torch.cuda.device_count()
        msg = f"Requested {args.num_gpus}, but {num_gpus} were visible"
        assert num_gpus == args.num_gpus, msg
        args.train_batch = args.train_batch * args.num_gpus
        args.test_batch = args.test_batch * args.num_gpus
        device_ids = list(range(args.num_gpus))
        args.lr = args.lr * args.num_gpus
    else:
        device_ids = [0]

    model = torch.nn.DataParallel(model, device_ids=device_ids)
    model = model.to(device)

    optimizer = torch.optim.SGD(
        model.parameters(),
        lr=args.lr,
        momentum=args.momentum,
        weight_decay=args.weight_decay,
    )

    # optionally resume from a checkpoint
    tic = time.time()
    title = f"{args.datasetname} - {args.arch}"
    if args.resume:
        if os.path.isfile(args.resume):
            plog.info(f"=> loading checkpoint '{args.resume}'")
            checkpoint = load_checkpoint(args.resume)
            model.load_state_dict(checkpoint["state_dict"])
            optimizer.load_state_dict(checkpoint["optimizer"])
            args.start_epoch = checkpoint["epoch"]
            plog.info(
                f"=> loaded checkpoint '{args.resume}' (epoch {checkpoint['epoch']})"
            )
            logger = Logger(os.path.join(args.checkpoint, "log.txt"),
                            title=title,
                            resume=True)
            del checkpoint
        else:
            plog.info(f"=> no checkpoint found at '{args.resume}'")
            raise ValueError(f"Checkpoint not found at {args.resume}!")
    else:
        logger = Logger(os.path.join(args.checkpoint, "log.txt"), title=title)
        logger_names = ["Epoch", "LR", "train_loss", "val_loss"]
        for p in range(0, args.nloss - 1):
            logger_names.append("train_loss%d" % p)
            logger_names.append("val_loss%d" % p)
        for p in range(args.nperf):
            logger_names.append("train_perf%d" % p)
            logger_names.append("val_perf%d" % p)

        logger.set_names(logger_names)

    if args.pretrained:
        load_checkpoint_flexible(model, optimizer, args, plog)

    param_count = humanize.intword(sum(p.numel() for p in model.parameters()))
    plog.info(f"    Total params: {param_count}")
    duration = time.strftime("%Hh%Mm%Ss", time.gmtime(time.time() - tic))
    plog.info(f"Loaded parameters for model in {duration}")

    mdl = MultiDataLoader(
        train_datasets=args.datasetname,
        val_datasets=args.datasetname,
    )
    train_loader, val_loader, meanstd = mdl._get_loaders(args)

    train_mean = meanstd[0]
    train_std = meanstd[1]
    val_mean = meanstd[2]
    val_std = meanstd[3]

    save_feature_dir = args.checkpoint
    save_fig_dir = Path(args.checkpoint) / "figs"
    if args.featurize_mode:
        save_feature_dir = Path(
            args.checkpoint) / "filtered" / args.featurize_mask
        save_feature_dir.mkdir(exist_ok=True, parents=True)
        save_fig_dir = Path(args.checkpoint) / "figs" / args.featurize_mask
        save_fig_dir.mkdir(exist_ok=True, parents=True)

    # Define criterion
    criterion = torch.nn.CrossEntropyLoss(reduction="mean")
    criterion = criterion.to(device)

    if args.evaluate or args.evaluate_video:
        plog.info("\nEvaluation only")
        loss, acc = do_epoch(
            "val",
            val_loader,
            model,
            criterion,
            num_classes=args.num_classes,
            debug=args.debug,
            checkpoint=args.checkpoint,
            mean=val_mean,
            std=val_std,
            feature_dim=args.feature_dim,
            save_logits=True,
            save_features=args.save_features,
            num_figs=args.num_figs,
            topk=args.topk,
            save_feature_dir=save_feature_dir,
            save_fig_dir=save_fig_dir,
        )
        if args.featurize_mode:
            plog.info(f"Featurizing without metric evaluation")
            return

        # Summarize/save results
        evaluate.evaluate(args, val_loader.dataset, plog)

        logger_epoch = [0, 0]
        for p in range(len(loss)):
            logger_epoch.append(float(loss[p].avg))
            logger_epoch.append(float(loss[p].avg))
        for p in range(len(acc)):
            logger_epoch.append(float(acc[p].avg))
            logger_epoch.append(float(acc[p].avg))
        # append logger file
        logger.append(logger_epoch)

        return

    lr = args.lr
    for epoch in range(args.start_epoch, args.epochs):
        lr = adjust_learning_rate(optimizer,
                                  epoch,
                                  lr,
                                  args.schedule,
                                  args.gamma,
                                  num_gpus=args.num_gpus)
        plog.info("\nEpoch: %d | LR: %.8f" % (epoch + 1, lr))

        # train for one epoch
        train_loss, train_perf = do_epoch(
            "train",
            train_loader,
            model,
            criterion,
            epochno=epoch,
            optimizer=optimizer,
            num_classes=args.num_classes,
            debug=args.debug,
            checkpoint=args.checkpoint,
            mean=train_mean,
            std=train_std,
            feature_dim=args.feature_dim,
            save_logits=False,
            save_features=False,
            num_figs=args.num_figs,
            topk=args.topk,
            save_feature_dir=save_feature_dir,
            save_fig_dir=save_fig_dir,
        )

        # evaluate on validation set
        valid_loss, valid_perf = do_epoch(
            "val",
            val_loader,
            model,
            criterion,
            epochno=epoch,
            num_classes=args.num_classes,
            debug=args.debug,
            checkpoint=args.checkpoint,
            mean=val_mean,
            std=val_std,
            feature_dim=args.feature_dim,
            save_logits=False,
            save_features=False,
            num_figs=args.num_figs,
            topk=args.topk,
            save_feature_dir=save_feature_dir,
            save_fig_dir=save_fig_dir,
        )

        logger_epoch = [epoch + 1, lr]
        for p in range(len(train_loss)):
            logger_epoch.append(float(train_loss[p].avg))
            logger_epoch.append(float(valid_loss[p].avg))
        for p in range(len(train_perf)):
            logger_epoch.append(float(train_perf[p].avg))
            logger_epoch.append(float(valid_perf[p].avg))
        # append logger file
        logger.append(logger_epoch)

        # save checkpoint
        save_checkpoint(
            {
                "epoch": epoch + 1,
                "arch": args.arch,
                "state_dict": model.state_dict(),
                "optimizer": optimizer.state_dict(),
            },
            checkpoint=args.checkpoint,
            snapshot=args.snapshot,
        )

        plt.clf()
        plt.subplot(121)
        logger.plot(["train_loss", "val_loss"])
        plt.subplot(122)
        logger.plot(["train_perf0", "val_perf0"])
        savefig(os.path.join(args.checkpoint, "log.pdf"))

    logger.close()
Пример #6
0
def main(args):
    global best_acc
    global best_auc

    if not os.path.exists(args.checkpoint):
        os.makedirs(args.checkpoint)

    print("==> Creating model '{}-{}', stacks={}, blocks={}, feats={}".format(
        args.netType, args.pointType, args.nStacks, args.nModules, args.nFeats))

    print("=> Models will be saved at: {}".format(args.checkpoint))

    model = models.__dict__[args.netType](
        num_stacks=args.nStacks,
        num_blocks=args.nModules,
        num_feats=args.nFeats,
        use_se=args.use_se,
        use_attention=args.use_attention,
        num_classes=68)

    model = torch.nn.DataParallel(model).cuda()

    criterion = torch.nn.MSELoss(size_average=True).cuda()

    optimizer = torch.optim.RMSprop(
        model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

    title = args.checkpoint.split('/')[-1] + ' on ' + args.data.split('/')[-1]

    Loader = get_loader(args.data)

    val_loader = torch.utils.data.DataLoader(
        Loader(args, 'A'),
        batch_size=args.val_batch,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=True)

    if args.resume:
        if os.path.isfile(args.resume):
            print("=> Loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_acc = checkpoint['best_acc']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> Loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch']))
            logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    else:
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names(['Epoch', 'LR', 'Train Loss', 'Valid Loss', 'Train Acc', 'Val Acc', 'AUC'])

    cudnn.benchmark = True
    print('=> Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / (1024. * 1024)))

    if args.evaluation:
        print('=> Evaluation only')
        D = args.data.split('/')[-1]
        save_dir = os.path.join(args.checkpoint, D)
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        loss, acc, predictions, auc = validate(val_loader, model, criterion, args.netType,
                                                        args.debug, args.flip)
        save_pred(predictions, checkpoint=save_dir)
        return

    train_loader = torch.utils.data.DataLoader(
        Loader(args, 'train'),
        batch_size=args.train_batch,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=True)
    lr = args.lr
    for epoch in range(args.start_epoch, args.epochs):
        lr = adjust_learning_rate(optimizer, epoch, lr, args.schedule, args.gamma)
        print('=> Epoch: %d | LR %.8f' % (epoch + 1, lr))

        train_loss, train_acc = train(train_loader, model, criterion, optimizer, args.netType,
                                      args.debug, args.flip)
        # do not save predictions in model file
        valid_loss, valid_acc, predictions, valid_auc = validate(val_loader, model, criterion, args.netType,
                                                      args.debug, args.flip)

        logger.append([int(epoch + 1), lr, train_loss, valid_loss, train_acc, valid_acc, valid_auc])

        is_best = valid_auc >= best_auc
        best_auc = max(valid_auc, best_auc)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'netType': args.netType,
                'state_dict': model.state_dict(),
                'best_acc': best_auc,
                'optimizer': optimizer.state_dict(),
            },
            is_best,
            predictions,
            checkpoint=args.checkpoint)

    logger.close()
    logger.plot(['AUC'])
    savefig(os.path.join(args.checkpoint, 'log.eps'))