Пример #1
0
def load_tgt_loaders(exp_dict):
    train_loader = datasets.get_loader(
        exp_dict["tgt_dataset"],
        "train",
        batch_size=exp_dict["tgt_batch_size"])
    val_loader = datasets.get_loader(
        exp_dict["tgt_dataset"], "val", batch_size=exp_dict["tgt_batch_size"])
    name = type(train_loader.dataset).__name__
    n_train = len(train_loader.dataset)
    n_test = len(val_loader.dataset)
    print("Target ({}): train set: {} - val set: {}".format(
        name, n_train, n_test))
    return train_loader, val_loader
Пример #2
0
def load_loaders(name, batch):
    train_loader = datasets.get_loader(name, "train", batch_size=batch)
    if train_loader is None:
        print("no such dataset named ", name)
        return
    val_loader = datasets.get_loader(name, "val", batch_size=batch)
    n_train = len(train_loader.dataset)
    n_test = len(val_loader.dataset)
    name = type(train_loader.dataset).__name__

    print("dataset ({}): train set: {} - val set: {}".format(
        name, n_train, n_test))
    return train_loader, val_loader
Пример #3
0
def test_model(model_class, run_func, args, split_idx=0, quiet=False):
    output_dir = args.output_dir  # save the output_dir

    if not quiet:
        print('Model loaded from: %s' % args.pretrain_model)
    model, args = load_model(args.pretrain_model,
                             model_class=model_class,
                             device=args.device)
    args.output_dir = output_dir

    test_data_loader = get_loader(args.data_dir,
                                  data_type='test',
                                  batch_size=args.batch_size,
                                  shuffle=False,
                                  split=split_idx,
                                  n_labels=args.n_labels)

    test_stats = run_func(model=model,
                          optim=None,
                          data_loader=test_data_loader,
                          data_type='test',
                          args=args,
                          write_path='%s/test_output.jsonl' % args.output_dir,
                          quiet=quiet)
    if not quiet:
        test_stats.print_stats('Test: ')
Пример #4
0
def load_src_loaders(exp_dict):
    train_loader = datasets.get_loader(exp_dict["src_dataset"],
                                       "train",
                                       batch_size=exp_dict["src_batch_size"],
                                       exp_dict=exp_dict)
    val_loader = datasets.get_loader(exp_dict["src_dataset"],
                                     "val",
                                     batch_size=exp_dict["src_batch_size"],
                                     exp_dict=exp_dict)
    n_train = len(train_loader.dataset)
    n_test = len(val_loader.dataset)
    name = type(train_loader.dataset).__name__

    print("Source ({}): train set: {} - val set: {}".format(
        name, n_train, n_test))
    return train_loader, val_loader
Пример #5
0
def get_loader(config):
    # todo
    config = config["dataset"]
    DATASET = config["name"]
    BATCH_SIZE = config["batch_size"]
    workers = config["workers"]
    trainloader, valloader = datasets.get_loader(setname=DATASET, batch_size=BATCH_SIZE, shuffle=True, num_workers=workers)
    return trainloader, valloader
Пример #6
0
def get_tgt_loader_supervised(exp_dict):
    train_loader = datasets.get_loader(
        exp_dict["tgt_dataset"],
        "train_supervised",
        batch_size=exp_dict["tgt_batch_size_supervised"],
        exp_dict=exp_dict)
    test_loader = datasets.get_loader(
        exp_dict["tgt_dataset"],
        "test_supervised",
        batch_size=exp_dict["tgt_batch_size_supervised"],
        exp_dict=exp_dict)
    name = type(train_loader.dataset).__name__
    n_train = len(train_loader.dataset)
    n_test = len(test_loader.dataset)
    print(
        "Target Supervised ({}): train set: {} ---------- test set: {}".format(
            name, n_train, n_test))
    return train_loader, test_loader
Пример #7
0
def main(config):
    # For fast training.
    # cudnn.benchmark = True

    # Create directories if not exist.
    if not os.path.exists(config.log_dir):
        os.makedirs(config.log_dir)
    if not os.path.exists(config.model_save_dir):
        os.makedirs(config.model_save_dir)
    if not os.path.exists(config.sample_dir):
        os.makedirs(config.sample_dir)
    if not os.path.exists(config.result_dir):
        os.makedirs(config.result_dir)

    # Data loader.
    celeba_loader = None
    rafd_loader = None

    if config.dataset in ['CelebA', 'Both']:
        celeba_loader = get_loader(config.celeba_image_dir, config.attr_path,
                                   config.selected_attrs,
                                   config.celeba_crop_size, config.image_size,
                                   config.batch_size, 'CelebA', config.mode,
                                   config.num_workers)
    if config.dataset in ['RaFD', 'Both']:
        rafd_loader = get_loader(config.rafd_image_dir, None, None,
                                 config.rafd_crop_size, config.image_size,
                                 config.batch_size, 'RaFD', config.mode,
                                 config.num_workers)

    # Trainer for training and testing StarGAN.
    trainer = Trainer(celeba_loader, rafd_loader, config)

    if config.mode == 'train':
        if config.dataset in ['CelebA', 'RaFD']:
            trainer.train()
        elif config.dataset in ['Both']:
            trainer.train_multi()
    elif config.mode == 'test':
        if config.dataset in ['CelebA', 'RaFD']:
            trainer.test()
        elif config.dataset in ['Both']:
            trainer.test_multi()
Пример #8
0
def get_data_examples(args, data_type, n_examples, shuffle_data=False):
    """Returns examples from data"""
    data_loader = get_loader(args.data_dir,
                             data_type=data_type,
                             batch_size=n_examples,
                             shuffle=shuffle_data,
                             split=0,
                             n_labels=args.n_labels)
    examples = data_loader.__iter__().next()
    return examples
Пример #9
0
    def __init__(self, config):
        self.config = config
        self.ckpt_dir = config.ckpt_dir
        if not os.path.exists(self.ckpt_dir):
            os.makedirs(self.ckpt_dir)

        self.save_config(config)
        self.timer = Timer()

        self.writer = SummaryWriter(log_dir=config.ckpt_dir)

        self.lr = config.lr
        self.datasets, self.loaders = get_loader(config)
        self.max_iters = config.max_iters
        if self.max_iters is not None:
            self.epochs = self.max_iters // len(self.loaders['train'])
        else:
            self.epochs = config.epochs
        self.start_epoch = 0
        self.num_classes = self.datasets['train'].n_classes

        self.scores = ScoreMeter(self.num_classes)

        self.model = ModelSelector[config.model](
            in_channels=config.in_channels,
            num_classes=self.num_classes,
            **config.model_params[config.model])

        if config.distributed:
            self.model = nn.DataParallel(self.model)
            patch_replication_callback(self.model)

        self.model = self.model.cuda()

        self.criterion = LossSelector[config.loss](
            **config.loss_params[config.loss])
        self.optimizer = optim.SGD(self.model.parameters(),
                                   lr=self.lr,
                                   momentum=0.9,
                                   weight_decay=4e-5)
        self.lr_decay = optim.lr_scheduler.CosineAnnealingLR(
            self.optimizer, self.max_iters)

        self.best_miou = float('-inf')

        if config.resume:
            logger.info('***Resume from checkpoint***')
            state = torch.load(os.path.join(self.ckpt_dir, 'ckpt.pt'))
            self.model.load_state_dict(state['model'])
            self.start_epoch = state['epoch']
            self.best_miou = state['best_miou']
            self.optimizer.load_state_dict(state['optim'])
            self.lr_decay.load_state_dict(state['lr_decay'])
            self.lr_decay.last_epoch = self.start_epoch
Пример #10
0
def main():
    
    print('*'*40)
    print(args.checkpoint)
    print(args.load_model)
    print(args.val_set)
    print('-'*40)
    
    ## dynamically adjust hyper-parameters for ResNets according to base_width
    if args.base_width != 64 and 'sat' in args.loss:
        factor = 64. / args.base_width
        args.sat_alpha = args.sat_alpha**(1. / factor)
        args.sat_es = int(args.sat_es * factor)
        print("Adaptive parameters adjustment: alpha = {:.3f}, Es = {:d}".format(args.sat_alpha, args.sat_es))

    global best_prec1
    global best_auc_1, best_auc_2, best_auc_3, best_model_1, best_model_2, best_model_3, best_epoch_1, best_epoch_2, best_epoch_3

    # Check the save_dir exists or not
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)
        os.makedirs(os.path.join(args.save_dir, 'train'))
        os.makedirs(os.path.join(args.save_dir, 'val'))
        os.makedirs(os.path.join(args.save_dir, 'test'))

    # prepare dataset
    val_loader, num_classes, val_targets, pass_idx = get_loader(args)
    
    model = get_model(args, num_classes, base_width=args.base_width)
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
    model.cuda()
    checkpoint = torch.load(args.checkpoint)
    model.load_state_dict(checkpoint[args.load_model])

    torch.cuda.manual_seed(args.seed)
    cudnn.benchmark = True

    criterion = get_loss(args, labels=val_targets, num_classes=num_classes,
                         train_len=0, val_len=len(val_targets), test_len=0,
                         pass_idx=pass_idx)
    optimizer = get_optimizer(model, args)
    scheduler = get_scheduler(optimizer, args)
    
    train_timeline = Timeline()
    val_timeline = Timeline()
    test_timeline = Timeline()
    
    validate(val_loader, model, 0, val_timeline, args.dataset, criterion=criterion, crop=args.crop, last=True)
    return
Пример #11
0
def build_dataloader(config, num_workers, distributed):
    import torch.utils.data as data
    import torch.utils.data.distributed
    import datasets

    datasets, data_and_label_keys = {}, {}
    datasets = build_dataset(config)

    loader = get_loader(
        dataset=datasets,
        dataset_config=config,
        num_dataloader_workers=num_workers,
        pin_memory=False,  ### Questionable
    )
    return loader
Пример #12
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-model_path', required=True)
    parser.add_argument('-model_type', required=True, choices=['gcn', 'proto'])
    args = parser.parse_args()

    model_type = args.model_type

    if model_type == 'gcn':
        model_class = GCN
    elif model_type == 'proto':
        model_class = ProtoNet
    else:
        assert False

    print('loading model from: %s' % args.model_path)

    model, args = load_model(args.model_path, model_class, None)
    test_data_loader = get_loader(
        args.data_dir, data_type='test', batch_size=BATCH_SIZE, shuffle=True,
        split=0)

    s_rho, p_rho = compute_r(model_type, model, args, test_data_loader)
Пример #13
0
def main(args=None, quiet=False, splits=None, abs_output_dir=False):
    if args is None:
        args = get_args()

    if args.pretrain_model is not None:
        test_model(model_class=ProtoNet,
                   run_func=run_func,
                   args=args,
                   quiet=quiet)
        exit()

    # If should initialize the point clouds as data points, get random mols
    # from the validation set
    if args.init_method == 'data':
        train_data_loader = get_loader(args.data_dir,
                                       data_type='val',
                                       batch_size=args.n_pc,
                                       shuffle=True,
                                       split=0,
                                       n_labels=args.n_labels)
        pc_data = train_data_loader.__iter__().next()
        args.pc_data = pc_data

    if args.plot_pc:
        init_plot_tracker(args)

    train_results = train_model(model_class=ProtoNet,
                                run_func=run_func,
                                args=args,
                                quiet=quiet,
                                splits=splits,
                                abs_output_dir=abs_output_dir)

    if args.plot_pc:
        args.plot_tracker.plot_and_save(args)

    return train_results
Пример #14
0
opts, _ = parser.parse_options()

opts_str = parser.make_opts_string(opts, verbose=True)

if opts.no_viz:
    viz = None
else:
    viz = Visualizer(port=opts.vizport,
                     hostname=opts.vizaddr,
                     is_remote=opts.viz_is_remote)

model = create_model(opts, viz)
t_dataset, v_dataset = create_dataset(opts)
t_loader = get_loader(data=t_dataset,
                      batch_size=opts.batch_size,
                      shuffle=not opts.no_shuffle,
                      num_workers=opts.num_workers)

model.init_viz(opts_str)

for n in range(model.epoch + 1, opts.n_epochs + 1):
    print('Epoch {}'.format(n))
    iters_p_epoch = len(t_loader)
    curr_iter = 0
    for example in t_loader:
        model.set_input(example)
        model.optimize_parameters()
        model.iter += 1
        curr_iter += 1
        if curr_iter % opts.print_freq == 0 \
           or curr_iter == iters_p_epoch:
Пример #15
0
def train_gan(cfg, logger, vis):
    # Setup seeds
    torch.manual_seed(cfg.get("seed", 1337))
    torch.cuda.manual_seed(cfg.get("seed", 1337))
    np.random.seed(cfg.get("seed", 1337))
    random.seed(cfg.get("seed", 1337))

    # Setup Dataloader
    data_loader = get_loader(cfg["data"]["dataset"])
    data_path = cfg["data"]["path"]

    t_loader = data_loader(
        data_path,
        split=cfg["data"]["train_split"],
        patch_size=cfg['data']['patch_size'],
        augmentation=cfg['data']['aug_data']
    )

    train_loader = DataLoader(
        t_loader,
        batch_size=cfg["batch_size"],
        num_workers=cfg["n_workers"],
        shuffle=True,
    )

    # custom weights initialization called on netG and netD
    def weights_init(m):
        classname = m.__class__.__name__
        if classname.find('Conv') != -1:
            m.weight.data.normal_(0.0, 0.02)
            m.bias.data.fill_(0)
        elif classname.find('BatchNorm') != -1:
            m.weight.data.normal_(1.0, 0.02)
            m.bias.data.fill_(0)

    ndf = cfg['ndf']
    ngf = cfg['ngf']
    nc = 3

    netD_cls = get_model(cfg['netd'])
    netG_cls = get_model(cfg['netg'])

    netD = netD_cls(nc, cfg['output_nc'], ndf).to(device)
    netG = netG_cls(cfg['input_nc'], cfg['output_nc'], ngf).to(device)

    netG.apply(weights_init)
    netD.apply(weights_init)
    logger.info(netD)
    logger.info(netG)

    ###########   LOSS & OPTIMIZER   ##########
    criterion = torch.nn.BCELoss()
    criterionL1 = torch.nn.L1Loss()

    optimizerD = torch.optim.Adam(netD.parameters(), lr=cfg['optimizer']['lr'],
                                  betas=(cfg['optimizer']['beta1'], 0.999))
    optimizerG = torch.optim.Adam(netG.parameters(), lr=cfg['optimizer']['lr'],
                                  betas=(cfg['optimizer']['beta1'], 0.999))

    ###########   GLOBAL VARIABLES   ###########
    input_nc = cfg['input_nc']
    output_nc = cfg['output_nc']
    fineSize = cfg['data']['patch_size']

    real_A = Variable(torch.FloatTensor(cfg['batch_size'], input_nc, fineSize, fineSize), requires_grad=False).to(
        device)
    real_B = Variable(torch.FloatTensor(cfg['batch_size'], output_nc, fineSize, fineSize), requires_grad=False).to(
        device)
    label = Variable(torch.FloatTensor(cfg['batch_size']), requires_grad=False).to(device)

    real_label = 1
    fake_label = 0

    ########### Training   ###########
    netD.train()
    netG.train()
    for epoch in range(1, cfg['max_iters'] + 1):
        for i, image in enumerate(train_loader):
            ########### fDx ###########
            netD.zero_grad()
            if cfg['direction'] == 'OtoB':
                imgA = image[1]
                imgB = image[0]
            else:
                imgA = image[0]
                imgB = image[1]

            # train with real data
            real_A.data.resize_(imgA.size()).copy_(imgA)
            real_B.data.resize_(imgB.size()).copy_(imgB)
            real_AB = torch.cat((real_A, real_B), 1)

            output = netD(real_AB)
            label.data.resize_(output.size())
            label.data.fill_(real_label)
            errD_real = criterion(output, label)
            errD_real.backward()

            # train with fake
            fake_B = netG(real_A)
            label.data.fill_(fake_label)

            fake_AB = torch.cat((real_A, fake_B), 1)
            output = netD(fake_AB.detach())
            errD_fake = criterion(output, label)
            errD_fake.backward()

            errD = (errD_fake + errD_real) / 2
            optimizerD.step()

            ########### fGx ###########
            netG.zero_grad()
            label.data.fill_(real_label)
            output = netD(fake_AB)
            errGAN = criterion(output, label)
            errL1 = criterionL1(fake_B, real_B)
            errG = errGAN + cfg['lamb'] * errL1

            errG.backward()
            optimizerG.step()

            ########### Logging ##########
            if i % 50 == 0:
                logger.info('[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f Loss_L1: %.4f'
                            % (epoch, cfg['max_iters'], i, len(train_loader),
                               errD.item(), errGAN.item(), errL1.item()))

            if cfg['vis']['use'] and (i % 50 == 0):
                fake_B = netG(real_A)
                vis.images(real_A.data.cpu().numpy(), win='real_A')
                vis.images(fake_B.detach().cpu().numpy(), win='fake_B')
                vis.images(real_B.data.cpu().numpy(), win='real_B')
                vis.plot('error_d', errD.item())
                vis.plot('error_g', errGAN.item())
                vis.plot('error_L1', errL1.item())

        if epoch % 20 == 0:
            save_image(
                name='train',
                img_lists=[real_A.data.cpu(), fake_B.data.cpu(), real_B.data.cpu()],
                path='%s/fake_samples_epoch_%03d.png' % (cfg['checkpoint_dir'], epoch),
                step=epoch,
                batch_size=cfg['batch_size']
            )
            save_checkpoints(model=netG,
                             step=epoch,
                             optim=optimizerG,
                             model_dir=cfg['checkpoint_dir'],
                             name='{}_step_{}'.format(cfg['netg'] + cfg['data']['dataset'], epoch))
            save_checkpoints(model=netD,
                             step=epoch,
                             optim=optimizerD,
                             model_dir=cfg['checkpoint_dir'],
                             name='{}_step_{}'.format(cfg['netd'] + cfg['data']['dataset'], epoch))
Пример #16
0
def train(cfg):
    
    # Setup seeds
    torch.manual_seed(cfg.get('seed', 1337))
    torch.cuda.manual_seed(cfg.get('seed', 1337))
    np.random.seed(cfg.get('seed', 1337))
    random.seed(cfg.get('seed', 1337))

    # Setup device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Setup Augmentations
    augmentations = cfg['training'].get('augmentations', None)
    data_aug = get_composed_augmentations(augmentations)

    # Setup Dataloader
    data_loader = get_loader(cfg['data']['dataset'])
    data_path = cfg['data']['path']

    t_loader = data_loader(
        data_path,
        is_transform=True,
        split=cfg['data']['train_split'],
        #img_size=(cfg['data']['img_rows'], cfg['data']['img_cols']),
        augmentations=data_aug)

    v_loader = data_loader(
        data_path,
        is_transform=True,
        split=cfg['data']['val_split'],
        #img_size=(cfg['data']['img_rows'], cfg['data']['img_cols']),
        )

    n_classes = t_loader.n_classes
    trainloader = data.DataLoader(t_loader,
                                  batch_size=cfg['training']['batch_size'], 
                                  num_workers=cfg['training']['n_workers'], 
                                  shuffle=True)

    valloader = data.DataLoader(v_loader, 
                                batch_size=cfg['training']['batch_size'], 
                                num_workers=cfg['training']['n_workers'])

    # Setup Metrics
    running_metrics_val = runningScore(n_classes)

    # Setup Model
    model = get_model(cfg['model'], n_classes).to(device)

    model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count()))

    # Setup optimizer, lr_scheduler and loss function
    optimizer_cls = get_optimizer(cfg)
    optimizer_params = {k:v for k, v in cfg['training']['optimizer'].items() 
                        if k != 'name'}

    optimizer = optimizer_cls(model.parameters(), **optimizer_params)
    scheduler = get_scheduler(optimizer, cfg['training']['lr_schedule'])

    loss_fn = get_loss_function(cfg)

    start_iter = 0
    if cfg['training']['resume'] is not None:
        if os.path.isfile(cfg['training']['resume']):
 
            checkpoint = torch.load(cfg['training']['resume'])
            model.load_state_dict(checkpoint["model_state"])
            optimizer.load_state_dict(checkpoint["optimizer_state"])
            scheduler.load_state_dict(checkpoint["scheduler_state"])
            start_iter = checkpoint["epoch"]
            print("=====>",
                "Loaded checkpoint '{}' (iter {})".format(
                    cfg['training']['resume'], checkpoint["epoch"]
                )
            )
        else:
            print("=====>","No checkpoint found at '{}'".format(cfg['training']['resume']))

    val_loss_meter = averageMeter()
    time_meter = averageMeter()

    best_iou = -100.0
    i = start_iter
    flag = True

    while i <= cfg['training']['train_iters'] and flag:
        for (images, labels) in trainloader:
            i += 1
            start_ts = time.time()
            scheduler.step()
            model.train()
            images = images.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)

            loss = loss_fn(input=outputs, target=labels)

            loss.backward()
            optimizer.step()
            
            time_meter.update(time.time() - start_ts)

            if (i + 1) % cfg['training']['print_interval'] == 0:
                fmt_str = "Iter [{:d}/{:d}]  Loss: {:.4f}  Time/Image: {:.4f}"
                print_str = fmt_str.format(i + 1,
                                           cfg['training']['train_iters'], 
                                           loss.item(),
                                           time_meter.avg / cfg['training']['batch_size'])

                print(print_str)
                time_meter.reset()

            if (i + 1) % cfg['training']['val_interval'] == 0 or \
               (i + 1) == cfg['training']['train_iters']:
                model.eval()
                with torch.no_grad():
                    for i_val, (images_val, labels_val) in tqdm(enumerate(valloader)):
                        images_val = images_val.to(device)
                        labels_val = labels_val.to(device)

                        outputs = model(images_val)
                        val_loss = loss_fn(input=outputs, target=labels_val)

                        pred = outputs.data.max(1)[1].cpu().numpy()
                        gt = labels_val.data.cpu().numpy()


                        running_metrics_val.update(gt, pred)
                        val_loss_meter.update(val_loss.item())


                print("Iter %d Loss: %.4f" % (i + 1, val_loss_meter.avg))

                score, class_iou = running_metrics_val.get_scores()
                for k, v in score.items():
                    print(k,':',v)

                for k, v in class_iou.items():
                    print('{}: {}'.format(k, v))

                val_loss_meter.reset()
                running_metrics_val.reset()

                if score["Mean IoU : \t"] >= best_iou:
                    best_iou = score["Mean IoU : \t"]
                    state = {
                        "epoch": i + 1,
                        "model_state": model.state_dict(),
                        "optimizer_state": optimizer.state_dict(),
                        "scheduler_state": scheduler.state_dict(),
                        "best_iou": best_iou,
                    }
                    save_path = os.path.join('./checkpoint',
                                             "{}_{}_best_model.pkl".format(
                                                 cfg['model']['arch'],
                                                 cfg['data']['dataset']))
                    print("saving···")
                    torch.save(state, save_path)

            if (i + 1) == cfg['training']['train_iters']:
                flag = False
                break
Пример #17
0
def main():
    ## dynamically adjust hyper-parameters for ResNets according to base_width
    if args.base_width != 64 and 'sat' in args.loss:
        factor = 64. / args.base_width
        args.sat_alpha = args.sat_alpha**(1. / factor)
        args.sat_es = int(args.sat_es * factor)
        print("Adaptive parameters adjustment: alpha = {:.3f}, Es = {:d}".format(args.sat_alpha, args.sat_es))

    print(args)
    global best_prec1, best_auc

    # Check the save_dir exists or not
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)
        os.makedirs(os.path.join(args.save_dir, 'train'))
        os.makedirs(os.path.join(args.save_dir, 'val'))
        os.makedirs(os.path.join(args.save_dir, 'test'))

    # prepare dataset
    if args.dataset == 'nexperia':
        train_loader, num_classes, targets = get_loader(args)
    else:
        train_loader, val_loaders, test_loader, num_classes, targets = get_loader(args)
    
    model = get_model(args, num_classes, base_width=args.base_width)
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
    model.cuda()

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            if args.dataset=='nexperia_split':
                best_auc = checkpoint['best_auc']
            else:
                best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    torch.cuda.manual_seed(args.seed)
    cudnn.benchmark = True

    criterion = get_loss(args, labels=targets, num_classes=num_classes)
    optimizer = get_optimizer(model, args)
    scheduler = get_scheduler(optimizer, args)
    
    train_timeline = Timeline()
    val_timeline = Timeline()
    test_timeline = Timeline()

    if args.evaluate:
        validate(test_loader, model, args.crop)
        return

    print("*" * 40)
    start = time.time()
    for epoch in range(args.start_epoch, args.epochs):
        scheduler.step(epoch)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, train_timeline, args.sat_es, args.dataset, args.mod, args.crop)
        print("*" * 40)
        
        if args.dataset!='nexperia':
            # evaluate on validation sets
            prec1 = 0
            if args.dataset=='nexperia_split':
                print('val:')
                val_auc = validate(
                    val_loaders, model, epoch, val_timeline, args.dataset, state='val', criterion=criterion, crop=args.crop)
                print("*" * 40)
                
                print('test:')
                test_auc = validate(
                    test_loader, model, epoch, test_timeline, args.dataset, state='test', criterion=criterion, crop=args.crop)
            else:
                for name, val_loader in zip(args.val_sets, val_loaders):
                    print(name +":", end="\t")
                    prec1 = validate(val_loader, model, args.crop)
            print("*" * 40)
            
            if args.dataset=='nexperia_split':
                # remember best auc and save checkpoint
                is_best = val_auc > best_auc
                best_auc = max(val_auc, best_auc)
                if args.save_freq > 0 and (epoch + 1) % args.save_freq == 0:
                    filename = 'checkpoint_{}.tar'.format(epoch + 1)
                else:
                    filename = None
                save_checkpoint(args.save_dir, {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'best_auc': best_auc,
                }, is_best, filename=filename)

            else:
                # remember best prec@1 and save checkpoint
                is_best = prec1 > best_prec1
                best_prec1 = max(prec1, best_prec1)
                if args.save_freq > 0 and (epoch + 1) % args.save_freq == 0:
                    filename = 'checkpoint_{}.tar'.format(epoch + 1)
                else:
                    filename = None
                save_checkpoint(args.save_dir, {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                }, is_best, filename=filename)
                
        if hasattr(criterion, 'outputs'):
            criterion.weights[epoch] = criterion.outputs[criterion.true_labels.index]
            criterion.clean_weights[epoch] = criterion.outputs[criterion.clean_labels.index]
        else:
            criterion.weights[epoch] = criterion.soft_labels[criterion.true_labels.index]
            criterion.clean_weights[epoch] = criterion.soft_labels[criterion.clean_labels.index]
                            
    if args.dataset!='nexperia':
        # evaludate latest checkpoint
        print("Test acc of latest checkpoint:", end='\t')
        validate(test_loader, model, epoch, test_timeline, args.dataset, last=True, crop=args.crop)
        print("*" * 40)
        
        # evaluate best checkpoint
        if args.dataset=='nexperia_split':
            checkpoint = torch.load(os.path.join(args.save_dir, 'checkpoint_best.tar'))
            print("Best validation auc ({}th epoch): {:.2f}%".format(checkpoint['epoch'], best_auc*100.))
            model.load_state_dict(checkpoint['state_dict'])
            print("Test acc of best checkpoint:", end='\t')
            validate(test_loader, model, checkpoint['epoch'], test_timeline, args.dataset, last=True, crop=args.crop)
            print("*" * 40)
        else:
            if len(val_loaders) > 0:
                checkpoint = torch.load(os.path.join(args.save_dir, 'checkpoint_best.tar'))
                print("Best validation acc ({}th epoch): {:.2f}%".format(checkpoint['epoch'], best_prec1))
                model.load_state_dict(checkpoint['state_dict'])
                print("Test acc of best checkpoint:", end='\t')
                validate(test_loader, model, last=True, crop=args.crop)
                print("*" * 40)

    time_elapsed = time.time() - start
    print('It takes {:.0f}m {:.0f}s to train.'.format(time_elapsed // 60, time_elapsed % 60))
    
    # save best result
    filename = 'train_results.tar'
    save_checkpoint(args.save_dir, {
        'num_epochs': args.epochs,
        'state_dict': model.state_dict(),
    }, is_best=True, filename=filename)

    # save soft label
    if hasattr(criterion, 'soft_labels'):
        out_fname = os.path.join(args.save_dir, 'updated_soft_labels.npy')
        np.save(out_fname, criterion.soft_labels.cpu().numpy())
        print("Updated soft labels is saved to {}".format(out_fname))
        
    # save weights change of 106 images
    if hasattr(criterion, 'weights'):
        out_fname = os.path.join(args.save_dir, 'weights_change.npy')
        np.save(out_fname, criterion.weights.cpu().numpy())
        print("weights change is saved to {}".format(out_fname))
        
    if hasattr(criterion, 'clean_weights'):
        out_fname = os.path.join(args.save_dir, 'clean_weights_change.npy')
        np.save(out_fname, criterion.clean_weights.cpu().numpy())
        print("clean weights change is saved to {}".format(out_fname))

    # save timelines
    train_acc_class = torch.cat(train_timeline.acc_class, dim=0)
    train_loss_class = torch.cat(train_timeline.loss_class, dim=0)
    train_acc_bi_class = torch.cat(train_timeline.acc_bi_class, dim=0)
    train_loss_bi_class = torch.cat(train_timeline.loss_bi_class, dim=0)
    train_me_class = torch.cat(train_timeline.me_class, dim=0)
    train_me_bi_class = torch.cat(train_timeline.me_bi_class, dim=0)
        
    val_acc_class = torch.cat(val_timeline.acc_class, dim=0)
    val_loss_class = torch.cat(val_timeline.loss_class, dim=0)
    val_acc_bi_class = torch.cat(val_timeline.acc_bi_class, dim=0)
    val_loss_bi_class = torch.cat(val_timeline.loss_bi_class, dim=0)
    val_me_class = torch.cat(val_timeline.me_class, dim=0)
    val_me_bi_class = torch.cat(val_timeline.me_bi_class, dim=0)
    
    test_acc_class = torch.cat(test_timeline.acc_class, dim=0)
    test_loss_class = torch.cat(test_timeline.loss_class, dim=0)
    test_acc_bi_class = torch.cat(test_timeline.acc_bi_class, dim=0)
    test_loss_bi_class = torch.cat(test_timeline.loss_bi_class, dim=0)
    test_me_class = torch.cat(test_timeline.me_class, dim=0)
    test_me_bi_class = torch.cat(test_timeline.me_bi_class, dim=0)
    
    np.save(os.path.join(args.save_dir, 'train', 'loss.npy'), train_timeline.loss)
    np.save(os.path.join(args.save_dir, 'train', 'acc.npy'), train_timeline.acc)
    np.save(os.path.join(args.save_dir, 'train', 'loss_bi.npy'), train_timeline.loss_bi)
    np.save(os.path.join(args.save_dir, 'train', 'acc_bi.npy'), train_timeline.acc_bi)
    np.save(os.path.join(args.save_dir, 'train', 'loss_class.npy'), train_loss_class)
    np.save(os.path.join(args.save_dir, 'train', 'acc_class.npy'), train_acc_class)
    np.save(os.path.join(args.save_dir, 'train', 'loss_bi_class.npy'), train_loss_bi_class)
    np.save(os.path.join(args.save_dir, 'train', 'acc_bi_class.npy'), train_acc_bi_class)
    np.save(os.path.join(args.save_dir, 'train', 'margin_error.npy'), train_timeline.margin_error)
    np.save(os.path.join(args.save_dir, 'train', 'margin_error_bi.npy'), train_timeline.margin_error_bi)
    np.save(os.path.join(args.save_dir, 'train', 'margin_error_class.npy'), train_me_class)
    np.save(os.path.join(args.save_dir, 'train', 'margin_error_bi_class.npy'), train_me_bi_class)
    np.save(os.path.join(args.save_dir, 'train', 'auc.npy'), train_timeline.auc)
    np.save(os.path.join(args.save_dir, 'train', 'fpr_991.npy'), train_timeline.fpr_991)
    np.save(os.path.join(args.save_dir, 'train', 'fpr_993.npy'), train_timeline.fpr_993)
    np.save(os.path.join(args.save_dir, 'train', 'fpr_995.npy'), train_timeline.fpr_995)
    np.save(os.path.join(args.save_dir, 'train', 'fpr_997.npy'), train_timeline.fpr_997)
    np.save(os.path.join(args.save_dir, 'train', 'fpr_999.npy'), train_timeline.fpr_999)
    np.save(os.path.join(args.save_dir, 'train', 'fpr_1.npy'), train_timeline.fpr_1)
    print("other training details are saved to {}".format(os.path.join(args.save_dir, 'train')))

    np.save(os.path.join(args.save_dir, 'val', 'loss.npy'), val_timeline.loss)
    np.save(os.path.join(args.save_dir, 'val', 'acc.npy'), val_timeline.acc)
    np.save(os.path.join(args.save_dir, 'val', 'loss_bi.npy'), val_timeline.loss_bi)
    np.save(os.path.join(args.save_dir, 'val', 'acc_bi.npy'), val_timeline.acc_bi)
    np.save(os.path.join(args.save_dir, 'val', 'loss_class.npy'), val_loss_class)
    np.save(os.path.join(args.save_dir, 'val', 'acc_class.npy'), val_acc_class)
    np.save(os.path.join(args.save_dir, 'val', 'loss_bi_class.npy'), val_loss_bi_class)
    np.save(os.path.join(args.save_dir, 'val', 'acc_bi_class.npy'), val_acc_bi_class)
    np.save(os.path.join(args.save_dir, 'val', 'margin_error.npy'), val_timeline.margin_error_bi)
    np.save(os.path.join(args.save_dir, 'val', 'margin_error_bi.npy'), val_timeline.margin_error_bi)
    np.save(os.path.join(args.save_dir, 'val', 'margin_error_class.npy'), val_me_class)
    np.save(os.path.join(args.save_dir, 'val', 'margin_error_bi_class.npy'), val_me_bi_class)
    np.save(os.path.join(args.save_dir, 'val', 'auc.npy'), val_timeline.auc)
    np.save(os.path.join(args.save_dir, 'val', 'fpr_991.npy'), val_timeline.fpr_991)
    np.save(os.path.join(args.save_dir, 'val', 'fpr_993.npy'), val_timeline.fpr_993)
    np.save(os.path.join(args.save_dir, 'val', 'fpr_995.npy'), val_timeline.fpr_995)
    np.save(os.path.join(args.save_dir, 'val', 'fpr_997.npy'), val_timeline.fpr_997)
    np.save(os.path.join(args.save_dir, 'val', 'fpr_999.npy'), val_timeline.fpr_999)
    np.save(os.path.join(args.save_dir, 'val', 'fpr_1.npy'), val_timeline.fpr_1)
    print("other validating details are saved to {}".format(os.path.join(args.save_dir, 'val')))

    np.save(os.path.join(args.save_dir, 'test', 'loss.npy'), test_timeline.loss)
    np.save(os.path.join(args.save_dir, 'test', 'acc.npy'), test_timeline.acc)
    np.save(os.path.join(args.save_dir, 'test', 'loss_bi.npy'), test_timeline.loss_bi)
    np.save(os.path.join(args.save_dir, 'test', 'acc_bi.npy'), test_timeline.acc_bi)
    np.save(os.path.join(args.save_dir, 'test', 'loss_class.npy'), test_loss_class)
    np.save(os.path.join(args.save_dir, 'test', 'acc_class.npy'), test_acc_class)
    np.save(os.path.join(args.save_dir, 'test', 'loss_bi_class.npy'), test_loss_bi_class)
    np.save(os.path.join(args.save_dir, 'test', 'acc_bi_class.npy'), test_acc_bi_class)
    np.save(os.path.join(args.save_dir, 'test', 'margin_error.npy'), test_timeline.margin_error_bi)
    np.save(os.path.join(args.save_dir, 'test', 'margin_error_bi.npy'), test_timeline.margin_error_bi)
    np.save(os.path.join(args.save_dir, 'test', 'margin_error_class.npy'), test_me_class)
    np.save(os.path.join(args.save_dir, 'test', 'margin_error_bi_class.npy'), test_me_bi_class)
    np.save(os.path.join(args.save_dir, 'test', 'auc.npy'), test_timeline.auc)
    np.save(os.path.join(args.save_dir, 'test', 'fpr_991.npy'), test_timeline.fpr_991)
    np.save(os.path.join(args.save_dir, 'test', 'fpr_993.npy'), test_timeline.fpr_993)
    np.save(os.path.join(args.save_dir, 'test', 'fpr_995.npy'), test_timeline.fpr_995)
    np.save(os.path.join(args.save_dir, 'test', 'fpr_997.npy'), test_timeline.fpr_997)
    np.save(os.path.join(args.save_dir, 'test', 'fpr_999.npy'), test_timeline.fpr_999)
    np.save(os.path.join(args.save_dir, 'test', 'fpr_1.npy'), test_timeline.fpr_1)
    print("other testing details are saved to {}".format(os.path.join(args.save_dir, 'test')))
Пример #18
0
def main():
    ## dynamically adjust hyper-parameters for ResNets according to base_width
    if args.base_width != 64 and 'sat' in args.loss:
        factor = 64. / args.base_width
        args.sat_alpha = args.sat_alpha**(1. / factor)
        args.sat_es = int(args.sat_es * factor)
        print(
            "Adaptive parameters adjustment: alpha = {:.3f}, Es = {:d}".format(
                args.sat_alpha, args.sat_es))

    print(args)
    global best_prec1

    # Check the save_dir exists or not
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    # prepare dataset
    train_loader, val_loaders, test_loader, num_classes, targets, clean_targets = get_loader(
        args)
    if args.is_tpu:
        device = xm.xla_device()

    model = get_model(args, num_classes, base_width=args.base_width)
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
    if args.is_tpu:
        model = model.to(device)
    else:
        model = model.cuda()

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    torch.manual_seed(args.seed)
    cudnn.benchmark = True

    criterion = get_loss(args,
                         device=device,
                         labels=targets,
                         num_classes=num_classes)
    optimizer = get_optimizer(model, args)
    scheduler = get_scheduler(optimizer, args)

    if args.evaluate:
        validate(test_loader, model, device)
        return

    print("*" * 40)
    for epoch in range(args.start_epoch, args.epochs):
        scheduler.step(epoch)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, device, args)
        print("*" * 40)

        # evaluate on validation sets
        prec1 = 0
        for name, val_loader in zip(args.val_sets, val_loaders):
            print(name + ":", end="\t")
            prec1 = validate(val_loader, model, device, epoch)
        print("*" * 40)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        if args.save_freq > 0 and (epoch + 1) % args.save_freq == 0:
            filename = 'checkpoint_{}.tar'.format(epoch + 1)
        else:
            filename = None
        save_checkpoint(args.save_dir, {
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'best_prec1': best_prec1,
        },
                        is_best,
                        filename=filename)

    # evaluate latest checkpoint
    print("Test acc of latest checkpoint:", end='\t')
    validate(test_loader, model, device)
    print("*" * 40)

    # evaluate best checkpoint
    if len(val_loaders) > 0:
        checkpoint = torch.load(
            os.path.join(args.save_dir, 'checkpoint_best.tar'))
        print("Best validation acc ({}th epoch): {}".format(
            checkpoint['epoch'], best_prec1))
        model.load_state_dict(checkpoint['state_dict'])
        print("Test acc of best checkpoint:", end='\t')
        validate(test_loader, model, device)
        print("*" * 40)

    # save soft label
    if hasattr(criterion, 'soft_labels'):
        out_fname = os.path.join(args.save_dir, 'updated_soft_labels.npy')
        np.save(out_fname, criterion.soft_labels.cpu().numpy())
        print("Updated soft labels is saved to {}".format(out_fname))

    # save noise targets
    out_fname = os.path.join(args.save_dir, 'noisy_labels.npy')
    np.save(out_fname, targets)
    print("Noisy labels saved to {}".format(out_fname))

    # save clean targets
    out_fname = os.path.join(args.save_dir, 'clean_labels.npy')
    np.save(out_fname, clean_targets)
    print("Clean labels saved to {}".format(out_fname))
Пример #19
0
def trainval(exp_dict, savedir_base, reset=False):
    # bookkeeping
    # ---------------

    # get experiment directory
    exp_id = hu.hash_dict(exp_dict)
    savedir = os.path.join(savedir_base, exp_id)

    if reset:
        # delete and backup experiment
        hc.delete_experiment(savedir, backup_flag=True)

    # create folder and save the experiment dictionary
    os.makedirs(savedir, exist_ok=True)
    hu.save_json(os.path.join(savedir, 'exp_dict.json'), exp_dict)
    pprint.pprint(exp_dict)
    print('Experiment saved in %s' % savedir)

    # Dataset
    # -----------

    # train loader
    train_loader = datasets.get_loader(dataset_name=exp_dict['dataset'],
                                       datadir=savedir_base,
                                       split='train')

    # val loader
    val_loader = datasets.get_loader(dataset_name=exp_dict['dataset'],
                                     datadir=savedir_base,
                                     split='val')

    # Model
    # -----------
    model = models.get_model(model_name=exp_dict['model'])

    # Checkpoint
    # -----------
    model_path = os.path.join(savedir, 'model.pth')
    score_list_path = os.path.join(savedir, 'score_list.pkl')

    if os.path.exists(score_list_path):
        # resume experiment
        model.set_state_dict(hu.torch_load(model_path))
        score_list = hu.load_pkl(score_list_path)
        s_epoch = score_list[-1]['epoch'] + 1
    else:
        # restart experiment
        score_list = []
        s_epoch = 0

    # Train & Val
    # ------------
    print('Starting experiment at epoch %d' % (s_epoch))

    for e in range(s_epoch, 10):
        score_dict = {}

        # Train the model
        train_dict = model.train_on_loader(train_loader)

        # Validate the model
        val_dict = model.val_on_loader(val_loader)

        # Get metrics
        score_dict['train_loss'] = train_dict['train_loss']
        score_dict['val_acc'] = val_dict['val_acc']
        score_dict['epoch'] = e

        # Add to score_list and save checkpoint
        score_list += [score_dict]

        # Report & Save
        score_df = pd.DataFrame(score_list)
        print(score_df.tail())
        hu.torch_save(model_path, model.get_state_dict())
        hu.save_pkl(score_list_path, score_list)
        print('Checkpoint Saved: %s' % savedir)

    print('experiment completed')
Пример #20
0
def train_weighted_descent(D, dataQ0, dataP, wP, opt):
    n_samples, n_features = dataQ0.shape
    device = dataQ0.device

    # Lagrange multiplier for Augmented Lagrangian
    lambda_aug = torch.tensor([opt.lambda_aug_init],
                              requires_grad=True,
                              device=device)

    # MMD distance
    mmd = MMD_RFF(num_features=n_features, num_outputs=300).to(device)

    # Train
    print('Start training')

    if opt.plot_online:
        fig, ax = plt.subplots()
        ax.set_xlim((-1.1, 1.1))
        ax.set_ylim((-1.1, 1.1))
        scat = ax.scatter([], [], facecolor='r')

    # Save stuff
    wQ = torch.ones((len(dataQ0), 1), device=device)
    collQ, collW, coll_mmd = [], [], []

    dataQ = dataQ0.clone()
    for t in range(opt.T + 1):
        tic = time.time()

        # Snapshot of current state
        with torch.no_grad():
            mmd_PQ = mmd(dataP,
                         dataQ,
                         weights_X=wP if wP is not None else None,
                         weights_Y=wQ)

        coll_mmd.append(mmd_PQ)
        collQ.append(dataQ.detach().cpu().numpy())  # snapshot of current state
        collW.append(
            wQ.view(-1).detach().cpu().numpy())  # snapshot of current weights

        # (1) Update D network
        optimizerD = torch.optim.Adam(D.parameters(),
                                      lr=opt.lrD,
                                      weight_decay=opt.wdecay,
                                      amsgrad=True)
        D.train()
        for i in range(opt.n_c_startup if t == 0 else opt.n_c):
            optimizerD.zero_grad()

            x_p, w_p = minibatch((dataP, wP), opt.batchSizeD)
            x_q, w_q = minibatch((dataQ, wQ), opt.batchSizeD)

            loss, Ep_f, Eq_f, normgrad_f2_q = D_forward_weights(
                D, x_p, w_p, x_q, w_q, lambda_aug, opt.alpha, opt.rho)
            loss.backward()
            optimizerD.step()

            manual_sgd_(lambda_aug, opt.rho)

        tocD = time.time() - tic

        # (2) Update Q distribution (with birth/death)
        D.eval()
        with torch.no_grad():
            x_q, w_q = minibatch((dataQ, wQ))
            f_q = D(x_q)
            m_f = (w_q * f_q).mean()

        new_x_q, log_wQ = [], []
        for x_q, w_q in get_loader((dataQ, wQ), batch_size=opt.batchSizeQ):
            x_q = x_q.detach().requires_grad_(True)
            sum_f_q = D(x_q).sum()
            grad_x_q = grad(outputs=sum_f_q, inputs=x_q, create_graph=True)[0]

            # Update particles
            with torch.no_grad():
                # Move particles
                x_q.data += opt.lrQ * grad_x_q
                f_q = D(x_q)
                dw_q = f_q - m_f

                log_wQ.append((w_q / n_samples).log() + opt.tau * dw_q)
                new_x_q.append(x_q)

        # Update weights and dataQ
        wQ = F.softmax(torch.cat(log_wQ), dim=0) * n_samples
        dataQ = torch.cat(new_x_q)

        # (3) print some stuff
        if t % opt.log_every == 0:
            x_p, w_p = minibatch((dataP, wP))
            x_q, w_q = minibatch((dataQ, wQ))

            loss, Ep_f, Eq_f, normgrad_f2_q = D_forward_weights(
                D, x_p, w_p, x_q, w_q, lambda_aug, opt.alpha, opt.rho)
            with torch.no_grad():
                SobDist_lasti = Ep_f.item() - Eq_f.item()
                mmd_dist = mmd(dataP,
                               dataQ,
                               weights_X=wP if wP is not None else None,
                               weights_Y=wQ)

            print('[{:5d}/{}] SobolevDist={:.4f}\t mmd={:.5f} Eq_normgrad_f2[stepQ]={:.3f} Ep_f={:.2f} Eq_f={:.2f} lambda_aug={:.4f}'.\
                format(t, opt.T, SobDist_lasti, mmd_dist, normgrad_f2_q.mean().item(), Ep_f.item(), Eq_f.item(), lambda_aug.item()))

            if opt.plot_online:
                scat.set_offsets(dataQ.detach().cpu().numpy())
                rgba_colors = np.zeros((wQ.shape[0], 4))
                rgba_colors[:, 0] = 1.0
                rgba_colors[:, 3] = wQ.view(
                    -1).detach().cpu().numpy() / wQ.max().item()
                scat.set_color(rgba_colors)
                plt.pause(0.01)

    return dataQ, wQ, collQ, collW, coll_mmd
Пример #21
0
def train_model(model_class,
                run_func,
                args,
                quiet=False,
                splits=None,
                abs_output_dir=False):
    output_dir = args.output_dir

    val_stat = args.val_stat
    # Keeps track of certain stats for all the data splits
    all_stats = {
        'val_%s' % val_stat: [],
        'test_%s' % val_stat: [],
        'best_epoch': [],
        'train_last': [],
        'train_best': [],
        'nce': [],
    }

    # Iterate over splits
    splits_iter = splits if splits is not None else range(args.n_splits)
    # Iterates through each split of the data
    for split_idx in splits_iter:
        # print('Training split idx: %d' % split_idx)

        # Creates the output directory for the run of the current split
        if not abs_output_dir:
            args.output_dir = output_dir + '/run_%d' % split_idx
        args.model_dir = args.output_dir + '/models'
        if not os.path.exists(args.output_dir):
            os.makedirs(args.output_dir)
        if not os.path.exists(args.model_dir):
            os.makedirs(args.model_dir)
        write_args(args)

        # Create model and optimizer
        model = model_class(args)
        model.to(args.device)

        if args.separate_lr:
            optim = model.get_model_optim()
        else:
            optim = torch.optim.Adam(model.parameters(), lr=args.lr)

        if split_idx == 0:
            # Print the number of parameters
            num_params = get_num_params(model)
            if not quiet:
                print('Initialized model with %d params' % num_params)

        # Load the train, val, test data
        dataset_loaders = {}
        for data_type in ['train', 'val', 'test']:
            dataset_loaders[data_type] = get_loader(
                args.data_dir,
                data_type=data_type,
                batch_size=args.batch_size,
                shuffle=data_type == 'train',
                split=split_idx,
                n_labels=args.n_labels)

        # Keeps track of stats across all the epochs
        train_m, val_m = StatsManager(), StatsManager()

        # Tensorboard logging, only for the first run split
        if args.log_tb and split_idx == 0:
            log_dir = output_dir + '/logs'
            tb_writer = SummaryWriter(log_dir, max_queue=1, flush_secs=60)
            log_tensorboard(tb_writer, {'params': num_params}, '', 0)
        else:
            args.log_tb = False

        # Training loop
        args.latest_train_stat = 0
        args.latest_val_stat = 0  # Keeps track of the latest relevant stat
        patience_idx = 0
        for epoch_idx in range(args.n_epochs):
            args.epoch = epoch_idx
            train_stats = run_func(model=model,
                                   optim=optim,
                                   data_loader=dataset_loaders['train'],
                                   data_type='train',
                                   args=args,
                                   write_path=None,
                                   quiet=quiet)
            should_write = epoch_idx % args.write_every == 0
            val_stats = run_func(
                model=model,
                optim=None,
                data_loader=dataset_loaders['val'],
                data_type='val',
                args=args,
                write_path='%s/val_output_%d.jsonl' %
                (args.output_dir, epoch_idx) if should_write else None,
                quiet=quiet)

            if not quiet:
                train_stats.print_stats('Train %d: ' % epoch_idx)
                val_stats.print_stats('Val   %d: ' % epoch_idx)

            if args.log_tb:
                log_tensorboard(tb_writer, train_stats.get_stats(), 'train',
                                epoch_idx)
                log_tensorboard(tb_writer, val_stats.get_stats(), 'val',
                                epoch_idx)

            train_stats.add_stat('epoch', epoch_idx)
            val_stats.add_stat('epoch', epoch_idx)

            train_m.add_stats(train_stats.get_stats())
            val_m.add_stats(val_stats.get_stats())

            if val_stats.get_stats()[val_stat] == min(val_m.stats[val_stat]):
                save_model(model,
                           args,
                           args.model_dir,
                           epoch_idx,
                           should_print=not quiet)
                patience_idx = 0
            else:
                patience_idx += 1
                if args.patience != -1 and patience_idx >= args.patience:
                    print(
                        'Validation error has not improved in %d, stopping at epoch: %d'
                        % (args.patience, args.epoch))
                    break

            # Keep track of the latest epoch stats
            args.latest_train_stat = train_stats.get_stats()[val_stat]
            args.latest_val_stat = val_stats.get_stats()[val_stat]

        # Load and save the best model
        best_epoch = val_m.get_best_epoch_for_stat(args.val_stat)
        best_model_path = '%s/model_%d' % (args.model_dir, best_epoch)
        model, _ = load_model(best_model_path,
                              model_class=model_class,
                              device=args.device)
        if not quiet:
            print('Loading model from %s' % best_model_path)

        save_model(model, args, args.model_dir, 'best', should_print=not quiet)

        # Test model
        test_stats = run_func(model=model,
                              optim=None,
                              data_loader=dataset_loaders['test'],
                              data_type='test',
                              args=args,
                              write_path='%s/test_output.jsonl' %
                              args.output_dir,
                              quiet=quiet)
        if not quiet:
            test_stats.print_stats('Test: ')

        if args.log_tb:
            log_tensorboard(tb_writer, test_stats.get_stats(), 'test', 0)
            tb_writer.close()

        # Write test output to a summary file
        with open('%s/summary.txt' % args.output_dir, 'w+') as summary_file:
            for k, v in test_stats.get_stats().items():
                summary_file.write('%s: %.3f\n' % (k, v))

        # Aggregate relevant stats
        all_stats['val_%s' % val_stat].append(min(val_m.stats[val_stat]))
        all_stats['test_%s' % val_stat].append(
            test_stats.get_stats()[val_stat])
        all_stats['best_epoch'].append(best_epoch)

        all_stats['train_last'].append(train_m.stats[val_stat][-1])
        all_stats['train_best'].append(train_m.stats[val_stat][best_epoch])

        if args.nce_coef > 0:
            all_stats['nce'].append(train_m.stats['nce_reg'][best_epoch])

    # Write the stats aggregated across all splits
    with open('%s/summary.txt' % (output_dir), 'w+') as summary_file:
        summary_file.write('Num epochs trained: %d\n' % args.epoch)
        for name, stats_arr in all_stats.items():
            if stats_arr == []:
                continue
            stats_arr = np.array(stats_arr)
            stats_mean = np.mean(stats_arr)
            stats_std = np.std(stats_arr)
            summary_file.write('%s: %s, mean: %.3f, std: %.3f\n' %
                               (name, str(stats_arr), stats_mean, stats_std))

    all_val_stats = np.array(all_stats['val_%s' % val_stat])
    all_test_stats = np.array(all_stats['test_%s' % val_stat])

    val_mean, val_std = np.mean(all_val_stats), np.std(all_val_stats)
    test_mean, test_std = np.mean(all_test_stats), np.std(all_val_stats)

    train_last = np.mean(np.array(all_stats['train_last']))
    train_best = np.mean(np.array(all_stats['train_best']))

    if args.nce_coef > 0:
        nce_loss = np.mean(np.array(all_stats['nce']))
    else:
        nce_loss = 0

    # Return stats
    return (val_mean, val_std), (test_mean, test_std), (train_last,
                                                        train_best), nce_loss
Пример #22
0
def train(cfg, logger, vis):
    # Setup seeds
    torch.manual_seed(cfg.get("seed", 1337))
    torch.cuda.manual_seed(cfg.get("seed", 1337))
    np.random.seed(cfg.get("seed", 1337))
    random.seed(cfg.get("seed", 1337))

    # Setup Dataloader
    data_loader = get_loader(cfg["data"]["dataset"])
    data_path = cfg["data"]["path"]

    t_loader = data_loader(
        data_path,
        split=cfg["data"]["train_split"],
        patch_size=cfg['data']['patch_size'],
        augmentation=cfg['data']['aug_data']
    )

    v_loader = data_loader(
        data_path,
        split=cfg["data"]["val_split"],
    )

    trainloader = DataLoader(
        t_loader,
        batch_size=cfg["batch_size"],
        num_workers=cfg["n_workers"],
        shuffle=True,
    )

    valloader = DataLoader(
        v_loader, batch_size=cfg["batch_size"], num_workers=cfg["n_workers"]
    )

    # Setup model, optimizer and loss function
    model_cls = get_model(cfg['model'])
    model = model_cls(cfg).to(device)

    optimizer_cls = get_optimizer(cfg)
    optimizer_params = {k: v for k, v in cfg["optimizer"].items() if k != "name"}
    optimizer = optimizer_cls(model.parameters(), **optimizer_params)

    scheduler = MultiStepLR(optimizer, milestones=[15000, 17500], gamma=0.1)

    crit = get_critical(cfg['critical'])().to(device)
    ssim = SSIM().to(device)

    step = 0

    if cfg['resume'] is not None:
        pass

    while step < cfg['max_iters']:
        scheduler.step()
        model.train()

        if cfg['model'] == 'rescan':
            O, B, prediciton = inference_rescan(model=model, optimizer=optimizer, dataloader=trainloader,
                                                critical=crit, ssim=ssim,
                                                step=step, vis=vis)
        if cfg['model'] == 'did_mdn':
            O, B, prediciton, label = inference_didmdn(model=model, optimizer=optimizer, dataloader=trainloader,
                                                       critical=crit, ssim=ssim,
                                                       step=step, vis=vis)

        if step % 10 == 0:
            model.eval()
            if cfg['model'] == 'rescan':
                O, B, prediciton_v = inference_rescan(model=model, optimizer=optimizer, dataloader=valloader,
                                                      critical=crit, ssim=ssim,
                                                      step=step, vis=vis)
            if cfg['model'] == 'did_mdn':
                O, B, prediciton, label = inference_didmdn(model=model, optimizer=optimizer,
                                                           dataloader=valloader,
                                                           critical=crit, ssim=ssim,
                                                           step=step, vis=vis)

        if step % int(cfg['save_steps'] / 16) == 0:
            save_checkpoints(model, step, optimizer, cfg['checkpoint_dir'], 'latest')
        if step % int(cfg['save_steps'] / 2) == 0:
            save_image('train', [O.cpu(), prediciton.cpu(), B.cpu()], cfg['checkpoint_dir'], step, cfg['batch_size'])
            if step % 10 == 0:
                save_image('val', [O.cpu(), prediciton.cpu(), B.cpu()], cfg['checkpoint_dir'], step, cfg['batch_size'])
            logger.info('save image as step_%d' % step)
        if step % cfg['save_steps'] == 0:
            save_checkpoints(model=model,
                             step=step,
                             optim=optimizer,
                             model_dir=cfg['checkpoint_dir'],
                             name='{}_step_{}'.format(cfg['model'] + cfg['data']['dataset'], step))
            logger.info('save model as step_%d' % step)
        step += 1
Пример #23
0
def train(args):

    # Setup Dataloader
    data_loader = get_loader('doc3dwc')
    data_path = args.data_path
    t_loader = data_loader(data_path,
                           is_transform=True,
                           img_size=(args.img_rows, args.img_cols),
                           augmentations=True)
    v_loader = data_loader(data_path,
                           is_transform=True,
                           split='val',
                           img_size=(args.img_rows, args.img_cols))

    n_classes = t_loader.n_classes
    trainloader = data.DataLoader(t_loader,
                                  batch_size=args.batch_size,
                                  num_workers=8,
                                  shuffle=True)
    valloader = data.DataLoader(v_loader,
                                batch_size=args.batch_size,
                                num_workers=8)

    # Setup Model
    model = get_model(args.arch, n_classes, in_channels=3)
    model = torch.nn.DataParallel(model,
                                  device_ids=range(torch.cuda.device_count()))
    model.cuda()

    # Activation
    htan = nn.Hardtanh(0, 1.0)

    # Optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.l_rate,
                                 weight_decay=5e-4,
                                 amsgrad=True)

    # LR Scheduler
    sched = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                       mode='min',
                                                       factor=0.5,
                                                       patience=5,
                                                       verbose=True)

    # Losses
    MSE = nn.MSELoss()
    loss_fn = nn.L1Loss()
    gloss = grad_loss.Gradloss(window_size=5, padding=2)

    epoch_start = 0
    if args.resume is not None:
        if os.path.isfile(args.resume):
            print("Loading model and optimizer from checkpoint '{}'".format(
                args.resume))
            checkpoint = torch.load(args.resume)
            model.load_state_dict(checkpoint['model_state'])
            optimizer.load_state_dict(checkpoint['optimizer_state'])
            print("Loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
            epoch_start = checkpoint['epoch']
        else:
            print("No checkpoint found at '{}'".format(args.resume))

    # Log file:
    if not os.path.exists(args.logdir):
        os.makedirs(args.logdir)
    # activation_dataset_lossparams_augmentations_trainstart
    experiment_name = 'htan_doc3d_l1grad_bghsaugk_scratch'
    log_file_name = os.path.join(args.logdir, experiment_name + '.txt')
    if os.path.isfile(log_file_name):
        log_file = open(log_file_name, 'a')
    else:
        log_file = open(log_file_name, 'w+')

    log_file.write('\n---------------  ' + experiment_name +
                   '  ---------------\n')
    log_file.close()

    # Setup tensorboard for visualization
    if args.tboard:
        # save logs in runs/<experiment_name>
        writer = SummaryWriter(comment=experiment_name)

    best_val_mse = 99999.0
    global_step = 0

    for epoch in range(epoch_start, args.n_epoch):
        avg_loss = 0.0
        avg_l1loss = 0.0
        avg_gloss = 0.0
        train_mse = 0.0
        model.train()

        for i, (images, labels) in enumerate(trainloader):
            images = images.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            pred = htan(outputs)
            g_loss = gloss(pred, labels)
            l1loss = loss_fn(pred, labels)
            loss = l1loss  # +(0.2*g_loss)
            avg_l1loss += float(l1loss)
            avg_gloss += float(g_loss)
            avg_loss += float(loss)
            train_mse += float(MSE(pred, labels).item())

            loss.backward()
            optimizer.step()
            global_step += 1

            if (i + 1) % 50 == 0:
                print("Epoch[%d/%d] Batch [%d/%d] Loss: %.4f" %
                      (epoch + 1, args.n_epoch, i + 1, len(trainloader),
                       avg_loss / 50.0))
                avg_loss = 0.0

            if args.tboard and (i + 1) % 20 == 0:
                show_wc_tnsboard(global_step, writer, images, labels, pred, 8,
                                 'Train Inputs', 'Train WCs',
                                 'Train Pred. WCs')
                writer.add_scalar('WC: L1 Loss/train', avg_l1loss / (i + 1),
                                  global_step)
                writer.add_scalar('WC: Grad Loss/train', avg_gloss / (i + 1),
                                  global_step)

        train_mse = train_mse / len(trainloader)
        avg_l1loss = avg_l1loss / len(trainloader)
        avg_gloss = avg_gloss / len(trainloader)
        print("Training L1:%4f" % (avg_l1loss))
        print("Training MSE:'{}'".format(train_mse))
        train_losses = [avg_l1loss, train_mse, avg_gloss]

        lrate = get_lr(optimizer)
        write_log_file(experiment_name, train_losses, epoch + 1, lrate,
                       'Train')

        model.eval()
        val_loss = 0.0
        val_mse = 0.0
        # val_bg = 0.0
        # val_fg = 0.0
        val_gloss = 0.0
        # val_dloss = 0.0
        for i_val, (images_val, labels_val) in tqdm(enumerate(valloader)):
            with torch.no_grad():
                images_val = images_val.to(device)
                labels_val = labels_val.to(device)

                outputs = model(images_val)
                pred_val = htan(outputs)
                g_loss = gloss(pred_val, labels_val).cpu()
                pred_val = pred_val.cpu()
                labels_val = labels_val.cpu()
                loss = loss_fn(pred_val, labels_val)
                val_loss += float(loss)
                val_mse += float(MSE(pred_val, labels_val))
                val_gloss += float(g_loss)

        if args.tboard:
            show_wc_tnsboard(epoch + 1, writer, images_val, labels_val, pred,
                             8, 'Val Inputs', 'Val WCs', 'Val Pred. WCs')
            writer.add_scalar('WC: L1 Loss/val', val_loss, epoch + 1)
            writer.add_scalar('WC: Grad Loss/val', val_gloss, epoch + 1)

        val_loss = val_loss / len(valloader)
        val_mse = val_mse / len(valloader)
        val_gloss = val_gloss / len(valloader)
        print("val loss at epoch {}:: {}".format(epoch + 1, val_loss))
        print("val MSE: {}".format(val_mse))

        val_losses = [val_loss, val_mse, val_gloss]
        write_log_file(experiment_name, val_losses, epoch + 1, lrate, 'Val')

        # reduce learning rate
        sched.step(val_mse)

        if val_mse < best_val_mse:
            best_val_mse = val_mse
            state = {
                'epoch': epoch + 1,
                'model_state': model.state_dict(),
                'optimizer_state': optimizer.state_dict(),
            }
            torch.save(
                state, args.logdir + "{}_{}_{}_{}_{}_best_model.pkl".format(
                    args.arch, epoch + 1, val_mse, train_mse, experiment_name))

        if (epoch + 1) % 10 == 0:
            state = {
                'epoch': epoch + 1,
                'model_state': model.state_dict(),
                'optimizer_state': optimizer.state_dict(),
            }
            torch.save(
                state, args.logdir + "{}_{}_{}_{}_{}_model.pkl".format(
                    args.arch, epoch + 1, val_mse, train_mse, experiment_name))
Пример #24
0
def train(args):

    # Setup Dataloader
    data_loader = get_loader('doc3dbmnic')
    data_path = args.data_path
    t_loader = data_loader(data_path,
                           is_transform=True,
                           img_size=(args.img_rows, args.img_cols))
    v_loader = data_loader(data_path,
                           is_transform=True,
                           split='val',
                           img_size=(args.img_rows, args.img_cols))

    n_classes = t_loader.n_classes
    trainloader = data.DataLoader(t_loader,
                                  batch_size=args.batch_size,
                                  num_workers=8,
                                  shuffle=True)
    valloader = data.DataLoader(v_loader,
                                batch_size=args.batch_size,
                                num_workers=8)

    # Setup Model
    model = get_model(args.arch, n_classes, in_channels=3)
    model = torch.nn.DataParallel(model,
                                  device_ids=range(torch.cuda.device_count()))
    model.cuda()

    # Optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.l_rate,
                                 weight_decay=5e-4,
                                 amsgrad=True)

    # LR Scheduler
    sched = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                       mode='min',
                                                       factor=0.5,
                                                       patience=3,
                                                       verbose=True)

    # Losses
    MSE = nn.MSELoss()
    loss_fn = nn.L1Loss()
    reconst_loss = recon_lossc.Unwarploss()

    epoch_start = 0
    if args.resume is not None:
        if os.path.isfile(args.resume):
            print("Loading model and optimizer from checkpoint '{}'".format(
                args.resume))
            checkpoint = torch.load(args.resume)
            model.load_state_dict(checkpoint['model_state'])
            # optimizer.load_state_dict(checkpoint['optimizer_state'])
            print("Loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
            epoch_start = checkpoint['epoch']
        else:
            print("No checkpoint found at '{}'".format(args.resume))

    # Log file:
    if not os.path.exists(args.logdir):
        os.makedirs(args.logdir)
    # network_activation(t=[-1,1])_dataset_lossparams_augmentations_trainstart
    experiment_name = 'dnetccnl_htan_swat3dmini1kbm_l1_noaug_scratch'
    log_file_name = os.path.join(args.logdir, experiment_name + '.txt')
    if os.path.isfile(log_file_name):
        log_file = open(log_file_name, 'a')
    else:
        log_file = open(log_file_name, 'w+')

    log_file.write('\n---------------  ' + experiment_name +
                   '  ---------------\n')
    log_file.close()

    # Setup tensorboard for visualization
    if args.tboard:
        # save logs in runs/<experiment_name>
        writer = SummaryWriter(comment=experiment_name)

    # best_val_uwarpssim = 99999.0
    best_val_mse = 99999.0
    global_step = 0

    for epoch in range(epoch_start, args.n_epoch):
        avg_loss = 0.0
        avgl1loss = 0.0
        avgrloss = 0.0
        avgssimloss = 0.0
        train_mse = 0.0
        model.train()

        for i, (images, labels) in enumerate(trainloader):
            images = Variable(images.cuda())
            labels = Variable(labels.cuda())
            optimizer.zero_grad()
            target = model(images[:, 3:, :, :])
            target_nhwc = target.transpose(1, 2).transpose(2, 3)
            l1loss = loss_fn(target_nhwc, labels)
            rloss, ssim, uworg, uwpred = reconst_loss(images[:, :-1, :, :],
                                                      target_nhwc, labels)
            loss = (10.0 * l1loss) + (0.5 * rloss)  # + (0.3*ssim)
            # loss=l1loss
            avgl1loss += float(l1loss)
            avg_loss += float(loss)
            avgrloss += float(rloss)
            avgssimloss += float(ssim)

            train_mse += MSE(target_nhwc, labels).item()

            loss.backward()
            optimizer.step()
            global_step += 1

            if (i + 1) % 50 == 0:
                avg_loss = avg_loss / 50
                print("Epoch[%d/%d] Batch [%d/%d] Loss: %.4f" %
                      (epoch + 1, args.n_epoch, i + 1, len(trainloader),
                       avg_loss))
                avg_loss = 0.0

            if args.tboard and (i + 1) % 20 == 0:
                show_unwarp_tnsboard(global_step, writer, uwpred, uworg, 8,
                                     'Train GT unwarp', 'Train Pred Unwarp')
                writer.add_scalar('BM: L1 Loss/train', avgl1loss / (i + 1),
                                  global_step)
                writer.add_scalar('CB: Recon Loss/train', avgrloss / (i + 1),
                                  global_step)
                writer.add_scalar('CB: SSIM Loss/train', avgssimloss / (i + 1),
                                  global_step)

        avgssimloss = avgssimloss / len(trainloader)
        avgrloss = avgrloss / len(trainloader)
        avgl1loss = avgl1loss / len(trainloader)
        train_mse = train_mse / len(trainloader)
        print("Training L1:%4f" % (avgl1loss))
        print("Training MSE:'{}'".format(train_mse))
        train_losses = [avgl1loss, train_mse, avgrloss, avgssimloss]
        lrate = get_lr(optimizer)
        write_log_file(log_file_name, train_losses, epoch + 1, lrate, 'Train')

        model.eval()
        # val_loss = 0.0
        val_l1loss = 0.0
        val_mse = 0.0
        val_rloss = 0.0
        val_ssimloss = 0.0

        for i_val, (images_val, labels_val) in tqdm(enumerate(valloader)):
            with torch.no_grad():
                images_val = Variable(images_val.cuda())
                labels_val = Variable(labels_val.cuda())
                target = model(images_val[:, 3:, :, :])
                target_nhwc = target.transpose(1, 2).transpose(2, 3)
                pred = target_nhwc.data.cpu()
                gt = labels_val.cpu()
                l1loss = loss_fn(target_nhwc, labels_val)
                rloss, ssim, uworg, uwpred = reconst_loss(
                    images_val[:, :-1, :, :], target_nhwc, labels_val)
                val_l1loss += float(l1loss.cpu())
                val_rloss += float(rloss.cpu())
                val_ssimloss += float(ssim.cpu())
                val_mse += float(MSE(pred, gt))
            if args.tboard:
                show_unwarp_tnsboard(epoch + 1, writer, uwpred, uworg, 8,
                                     'Val GT unwarp', 'Val Pred Unwarp')

        val_l1loss = val_l1loss / len(valloader)
        val_mse = val_mse / len(valloader)
        val_ssimloss = val_ssimloss / len(valloader)
        val_rloss = val_rloss / len(valloader)
        print("val loss at epoch {}:: {}".format(epoch + 1, val_l1loss))
        print("val mse: {}".format(val_mse))
        val_losses = [val_l1loss, val_mse, val_rloss, val_ssimloss]
        write_log_file(log_file_name, val_losses, epoch + 1, lrate, 'Val')
        if args.tboard:
            # log the val losses
            writer.add_scalar('BM: L1 Loss/val', val_l1loss, epoch + 1)
            writer.add_scalar('CB: Recon Loss/val', val_rloss, epoch + 1)
            writer.add_scalar('CB: SSIM Loss/val', val_ssimloss, epoch + 1)

        # reduce learning rate
        sched.step(val_mse)

        if val_mse < best_val_mse:
            best_val_mse = val_mse
            state = {
                'epoch': epoch + 1,
                'model_state': model.state_dict(),
                'optimizer_state': optimizer.state_dict(),
            }
            torch.save(
                state, args.logdir + "{}_{}_{}_{}_{}_best_model.pkl".format(
                    args.arch, epoch + 1, val_mse, train_mse, experiment_name))

        if (epoch + 1) % 10 == 0:
            state = {
                'epoch': epoch + 1,
                'model_state': model.state_dict(),
                'optimizer_state': optimizer.state_dict(),
            }
            torch.save(
                state, args.logdir + "{}_{}_{}_{}_{}_model.pkl".format(
                    args.arch, epoch + 1, val_mse, train_mse, experiment_name))
Пример #25
0
def test(cfg, logger, vis):
    torch.cuda.manual_seed_all(66)
    torch.manual_seed(66)

    # Setup model, optimizer and loss function
    model_cls = get_model(cfg['model'])
    model = model_cls(cfg).to(device)

    optimizer_cls = get_optimizer(cfg)
    optimizer_params = {
        k: v
        for k, v in cfg["optimizer"].items() if k != "name"
    }
    optimizer = optimizer_cls(model.parameters(), **optimizer_params)

    crit = get_critical(cfg['critical'])().to(device)
    ssim = SSIM().to(device)

    model.eval()
    _, step = load_checkpoints(model,
                               optimizer,
                               cfg['checkpoint_dir'],
                               name='latest')

    # Setup Dataloader
    data_loader = get_loader(cfg["data"]["dataset"])
    data_path = cfg["data"]["path"]

    test_loader = data_loader(data_path,
                              split=cfg["data"]["test_split"],
                              patch_size=cfg['data']['patch_size'],
                              augmentation=cfg['data']['aug_data'])

    testloader = DataLoader(
        test_loader,
        batch_size=cfg["batch_size"],
        num_workers=cfg["n_workers"],
        shuffle=True,
    )

    all_num = 0
    all_losses = {}
    for i, batch in enumerate(testloader):

        O, B = batch
        O, B = Variable(O.to(device),
                        requires_grad=False), Variable(B.to(device),
                                                       requires_grad=False)
        R = O - B

        with torch.no_grad():
            O_Rs = model(O)
        loss_list = [crit(O_R, R) for O_R in O_Rs]
        ssim_list = [ssim(O - O_R, O - R) for O_R in O_Rs]

        losses = {
            'loss%d' % i: loss.item()
            for i, loss in enumerate(loss_list)
        }
        ssimes = {
            'ssim%d' % i: ssim.item()
            for i, ssim in enumerate(ssim_list)
        }
        losses.update(ssimes)

        prediction = O - O_Rs[-1]

        batch_size = O.size(0)

        all_num += batch_size
        for key, val in losses.items():
            if i == 0:
                all_losses[key] = 0.
            all_losses[key] += val * batch_size
            logger.info('batch %d loss %s: %f' % (i, key, val))

        if vis is not None:
            for k, v in losses.items():
                vis.plot(k, v)
            vis.images(np.clip((prediction.detach().data * 255).cpu().numpy(),
                               0, 255),
                       win='pred')
            vis.images(O.data.cpu().numpy(), win='input')
            vis.images(B.data.cpu().numpy(), win='groundtruth')

        if i % 20 == 0:
            save_image(name='test',
                       img_lists=[O.cpu(), prediction.cpu(),
                                  B.cpu()],
                       path=cfg['show_dir'],
                       step=i,
                       batch_size=cfg['batch_size'])

    for key, val in all_losses.items():
        logger.info('total loss %s: %f' % (key, val / all_num))
Пример #26
0
    def __init__(self, config):
        self.config = config
        self.ckpt_dir = config.ckpt_dir
        if not os.path.exists(self.ckpt_dir):
            os.makedirs(self.ckpt_dir)

        self.save_config(config)
        self.timer = Timer()

        self.writer = SummaryWriter(log_dir=config.ckpt_dir)

        self.lr = config.lr
        self.lr_decay_start = config.lr_decay_start
        self.datasets, self.loaders = get_loader(config)
        self.max_iters = config.max_iters
        if self.max_iters is not None:
            self.epochs = self.max_iters // len(self.loaders['train'])
        else:
            self.epochs = config.epochs
        self.start_epoch = 0

        ### Network ###
        self.netG = GModelSelector[config.G_model].ResnetGenerator(
            input_nc=config.in_channels,
            output_nc=config.out_channels,
            use_dropout=config.use_dropout,
            **config.model_params[config.G_model])
        self.netD = DModelSelector[config.D_model].NLayerDiscriminator(
            input_nc=6, n_layers=3)

        self.criterion_GAN = nn.MSELoss()
        self.criterion_L1 = nn.L1Loss()

        if config.distributed:
            self.netG = nn.DataParallel(self.netG)
            self.netD = nn.DataParallel(self.netD)
            patch_replication_callback(self.netG)
            patch_replication_callback(self.netD)

        if self.config.cuda:
            self.netG = self.netG.cuda()
            self.netD = self.netD.cuda()
            self.criterion_GAN = self.criterion_GAN.cuda()
            self.criterion_L1 = self.criterion_L1.cuda()

        # self.criterion = LossSelector[config.loss](**config.loss_params[config.loss])
        self.optimizer_G = optim.Adam(self.netG.parameters(),
                                      lr=self.lr,
                                      betas=(0.9, 0.999),
                                      eps=1e-8)
        self.optimizer_D = optim.Adam(self.netD.parameters(),
                                      lr=self.lr,
                                      betas=(0.9, 0.999),
                                      eps=1e-8)
        if self.max_iters is not None:
            self.lr_decay_G = optim.lr_scheduler.CosineAnnealingLR(
                self.optimizer_G, self.max_iters)
            self.lr_decay_D = optim.lr_scheduler.CosineAnnealingLR(
                self.optimizer_D, self.max_iters)
        elif self.epochs is not None:
            self.lr_decay_G = optim.lr_scheduler.LambdaLR(
                self.optimizer_G, lr_lambda=self.lambda_rule)
            self.lr_decay_D = optim.lr_scheduler.LambdaLR(
                self.optimizer_D, lr_lambda=self.lambda_rule)
        else:
            raise NotImplementedError(
                'max_iters or epochs cannot be {}'.format(self.epochs))

        self.best_loss = float('inf')

        if config.resume:
            logger.info('***Resume from checkpoint***')
            state = torch.load(os.path.join(self.ckpt_dir, 'netG.pt'))
            self.netG.load_state_dict(state['net'])
            self.start_epoch = state['epoch']
            self.best_loss = state['best_loss']
            self.optimizer_G.load_state_dict(state['optim'])
            self.lr_decay_G.load_state_dict(state['lr_decay_G'])
            self.lr_decay_G.last_epoch = self.start_epoch

            state = torch.load(os.path.join(self.ckpt_dir, 'netD.pt'))
            self.netD.load_state_dict(state['net'])
            self.start_epoch = state['epoch']
            self.best_loss = state['best_loss']
            self.optimizer_D.load_state_dict(state['optim'])
            self.lr_decay_D.load_state_dict(state['lr_decay_D'])
            self.lr_decay_D.last_epoch = self.start_epoch
Пример #27
0
                                 columns=[i for i in label])
            #fig = plt.figure(figsize = (10,7))
            fig = sn.heatmap(df_cm, annot=True, cmap="BuPu", fmt='g')
            fig = fig.get_figure()
            fig.savefig(confusion_matrix, dpi=400)
        return


if __name__ == "__main__":
    main()

if __name__ == "__main__  not use":
    ckpt_name = "affectnet7_mobilenet_small_floss_alpha2.pth.tar"
    print("Evaluating", ckpt_name)
    _, valLoader = datasets.get_loader(setname="affectnet7",
                                       batch_size=8,
                                       use_sampler=False,
                                       num_workers=4)
    ckpt = torch.load("ckpt/" + ckpt_name, map_location='cpu')
    model = MobileNetV3_Small()
    model.load_state_dict(ckpt["state_dict"])
    #rafdb_table = {1:"Surprise", 2:"Fear", 3:"Disgust", 4:"Happiness", 5:"Sadness", 6:"Anger", 7:"Neutral"}
    affectnet7_table = {
        0: "Neutral",
        1: "Happy",
        2: "Sad",
        3: "Surprise",
        4: "Fear",
        5: "Disgust",
        6: "Anger"
    }
    eval(model, valLoader, affectnet7_table)
Пример #28
0
def train_unbalanced_descent(D, dataQ0, dataP, wP, opt):
    n_samples, n_features = dataQ0.shape
    device = dataQ0.device

    # Lagrange multiplier for Augmented Lagrangian
    lambda_aug = torch.tensor([opt.lambda_aug_init],
                              requires_grad=True,
                              device=device)

    # MMD distance
    mmd = MMD_RFF(num_features=n_features, num_outputs=300).to(device)

    # Train
    print('Start training')

    if opt.plot_online:
        fig, ax = plt.subplots()
        ax.set_xlim((-1.1, 1.1))
        ax.set_ylim((-1.1, 1.1))
        scat = ax.scatter([], [], facecolor='r')

    # Save stuff
    collQ, coll_mmd = [], []
    birth_total, death_total = 0, 0

    dataQ = dataQ0.clone()
    for t in range(opt.T + 1):
        tic = time.time()

        # Snapshot of current state
        with torch.no_grad():
            mmd_PQ = mmd(dataP,
                         dataQ,
                         weights_X=wP if wP is not None else None)
        coll_mmd.append(mmd_PQ)
        collQ.append(dataQ.detach().cpu().numpy())  # snapshot of current state

        # (1) Update D network
        optimizerD = torch.optim.Adam(D.parameters(),
                                      lr=opt.lrD,
                                      weight_decay=opt.wdecay,
                                      amsgrad=True)
        D.train()
        for i in range(opt.n_c_startup if t == 0 else opt.n_c):
            optimizerD.zero_grad()

            x_p, w_p = minibatch((dataP, wP), opt.batchSizeD)
            x_q = minibatch(dataQ, opt.batchSizeD).requires_grad_(True)

            loss, Ep_f, Eq_f, normgrad_f2_q = D_forward_weights(
                D, x_p, w_p, x_q, 1.0, lambda_aug, opt.alpha, opt.rho)
            loss.backward()
            optimizerD.step()

            manual_sgd_(lambda_aug, opt.rho)

        tocD = time.time() - tic

        # (2) Update Q distribution (with birth/death)
        D.eval()

        # compute initial m_f
        with torch.no_grad():
            x_q = minibatch(dataQ)
            m_f = D(x_q).mean()

        # Update particles positions, and compute birth-death scores
        new_x_q, b_j = [], []
        for x_q, in get_loader(dataQ, batch_size=opt.batchSizeQ):
            x_q = x_q.detach().requires_grad_(True)
            sum_f_q = D(x_q).sum()
            grad_x_q = grad(outputs=sum_f_q, inputs=x_q, create_graph=True)[0]

            with torch.no_grad():
                new_x_q.append(x_q + opt.lrQ * grad_x_q)
                f_q_new = D(new_x_q[-1])

                # birth-death score
                m_f = m_f + (1 / n_samples) * (f_q_new.sum() - sum_f_q)

                b_j.append(f_q_new.view(-1) - m_f)

        new_x_q = torch.cat(new_x_q)
        b_j = torch.cat(b_j)

        # Birth
        idx_alive = (b_j > 0).nonzero().view(-1)
        p_j = 1 - torch.exp(-opt.alpha * opt.tau * b_j[idx_alive])
        idx_birth = idx_alive[p_j > torch.rand_like(p_j)]

        # Death
        idx_neg = (b_j <= 0).nonzero().view(-1)
        p_j = 1 - torch.exp(-opt.alpha * opt.tau * torch.abs(b_j[idx_neg]))
        ix_die = p_j > torch.rand_like(p_j)  # Particles that die
        idx_dead = idx_neg[ix_die]
        idx_notdead = idx_neg[~ix_die]  # Particles that don't die

        birth_total += len(idx_birth)
        death_total += len(idx_dead)

        if not opt.keep_order:
            new_x_q.data = new_x_q.data[torch.cat(
                (idx_alive, idx_notdead, idx_birth))]

            # Resize population
            if opt.balance:
                n_l = new_x_q.shape[0]

                if n_l < n_samples:  # Randomly double particles
                    r_idx = torch.randint(n_l, (n_samples - n_l, ))
                    new_x_q = torch.cat((new_x_q, new_x_q[r_idx]))

                if n_l > n_samples:  # Randomly kill particles
                    r_idx = torch.randperm(
                        n_l)[:n_samples]  # Particles that should be kept
                    new_x_q = new_x_q[r_idx]

        else:
            # Sample dead samples from cloned ones (if there are any), otherwise sample them from alive
            if len(idx_birth) > 0:
                r_idx = idx_birth[torch.randint(len(idx_birth),
                                                (len(idx_dead), ))]
            else:
                r_idx = idx_alive[torch.randint(len(idx_alive),
                                                (len(idx_dead), ))]
            new_x_q.data[idx_dead] = new_x_q.data[r_idx]

        dataQ = new_x_q.data

        # (3) print some stuff
        if t % opt.log_every == 0:
            x_p, w_p = minibatch((dataP, wP))
            x_q = minibatch(dataQ)
            loss, Ep_f, Eq_f, normgrad_f2_q = D_forward_weights(
                D, x_p, w_p, x_q, 1.0, lambda_aug, opt.alpha, opt.rho)
            with torch.no_grad():
                SobDist_lasti = Ep_f.item() - Eq_f.item()
                mmd_dist = mmd(dataP,
                               dataQ,
                               weights_X=wP if wP is not None else None)

            print('[{:5d}/{}] SobolevDist={:.4f}\t mmd={:.5f} births={} deaths={} Eq_normgrad_f2[stepQ]={:.3f} Ep_f={:.2f} Eq_f={:.2f} lambda_aug={:.4f}'.\
                format(t, opt.T, SobDist_lasti, mmd_dist, birth_total, death_total, normgrad_f2_q.mean().item(), Ep_f.item(), Eq_f.item(), lambda_aug.item()))

            if opt.plot_online:
                line.set_data(dataQ[:, 0].detach().cpu().numpy(),
                              dataQ[:, 1].detach().cpu().numpy())
                plt.pause(0.01)

    return dataQ, collQ, coll_mmd
Пример #29
0
if __name__ == '__main__':
    # Set up random seed
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if torch.cuda.is_available():
        torch.backends.cudnn.deterministic = True
        # Setting bechmark to False might slow down the training speed
        torch.backends.cudnn.benchmark = True  # False

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    start_epoch = 0

    # Set up data
    print("Loading data")
    train_dataloader, val_dataloader = get_loader(args)

    # Set up model and loss function
    print("Creating model")
    model = get_model(args)
    #     device_count = torch.cuda.device_count()
    #     if device_count > 1:
    #         model = nn.DataParallel(model)
    model = model.to(device)
    # model.load_state_dict(torch.load('../test_v2/2020-05-09-05-41-04/epoch21.pth')['state_dict'])

    if args.resume_dir and not args.debug:
        # Load checkpoint
        print('==> Resuming from checkpoint')
        checkpoint = torch.load(os.path.join(args.resume_dir, 'ckpt.pth'))
        model.load_state_dict(checkpoint['state_dict'])
Пример #30
0
def visEmbed(exp_dict):
    # src_loader = datasets.get_loader(exp_dict["src_dataset"], "train",
    # batch_size=exp_dict["src_batch_size"])

    # tgt_val_loader =  datasets.get_loader(exp_dict["tgt_dataset"], "val",
    # batch_size=exp_dict["tgt_batch_size"])

    src_loader = datasets.get_loader(exp_dict["src_dataset"],
                                     "train",
                                     batch_size=exp_dict["src_batch_size"],
                                     exp_dict=exp_dict)

    tgt_val_loader = datasets.get_loader(exp_dict["tgt_dataset"],
                                         "val",
                                         batch_size=exp_dict["tgt_batch_size"],
                                         exp_dict=exp_dict)

    # src_model, src_opt = models.get_model(exp_dict["src_model"],
    # exp_dict["n_outputs"])

    src_model, src_opt, src_scheduler = models.get_model(
        exp_dict["src_model"],
        exp_dict["n_outputs"],
        input_channels=exp_dict['input_channels'],
        patch_size=exp_dict['patch_size'],
        n_classes=exp_dict['n_classes'])
    src_model.load_state_dict(
        torch.load(exp_dict["path"] +
                   "/model_src_run{}.pth".format(exp_dict['run'])))

    # tgt_model, tgt_opt = models.get_model(exp_dict["tgt_model"],
    # exp_dict["n_outputs"])

    tgt_model, tgt_opt, tgt_scheduler = models.get_model(
        exp_dict["tgt_model"],
        exp_dict["n_outputs"],
        input_channels=exp_dict['input_channels'],
        patch_size=exp_dict['patch_size'],
        n_classes=exp_dict['n_classes'])
    tgt_model.load_state_dict(
        torch.load(exp_dict["path"] +
                   "/model_tgt_run{}.pth".format(exp_dict['run'])))

    X, X_tgt = losses.extract_embeddings(src_model, src_loader)

    Y, Y_tgt = losses.extract_embeddings(tgt_model, tgt_val_loader)

    X, X_tgt = X[:500], X_tgt[:500]
    Y, Y_tgt = Y[:500], Y_tgt[:500]
    # X, X_tgt = X[:1080], X_tgt[:1080]
    # Y,Y_tgt = Y[:1080], Y_tgt[:1080]

    n_classes = tgt_model.n_classes
    src_kmeans = KMeans(n_clusters=n_classes)
    src_kmeans.fit(X)
    Xc = src_kmeans.cluster_centers_

    clf = neighbors.KNeighborsClassifier(n_neighbors=2)
    clf.fit(X, X_tgt)
    Xc_tgt = clf.predict(Xc)

    # acc_tgt = test.validate(src_model, tgt_model,
    #                                 src_loader,
    #                                 tgt_val_loader)

    tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
    #tsne.fit(Y[:500])
    S_tsne = tsne.fit_transform(np.vstack([Y, X, Xc]))
    #X_tsne = tsne.transform(X[:500])
    Y_tsne = S_tsne[:Y.shape[0]]
    X_tsne = S_tsne[Y.shape[0]:-n_classes]
    Xc_tsne = S_tsne[-n_classes:]
    # plt.mpl.rcParams['grid.color'] = 'k'
    # plt.mpl.rcParams['grid.linestyle'] = ':'
    # plt.mpl.rcParams['grid.linewidth'] = 0.5
    # Y_labels = Y_labels
    # X_labels = X_labels

    # scatter(Y_tsne, Y_tgt+1, win="1", title="target - {}".format(exp_dict["tgt_dataset"]))
    # scatter(X_tsne, X_tgt+1, win="2",title="source - {}".format(exp_dict["src_dataset"]))

    colors = [
        "b", "g", "r", "c", "m", "y", "gray", "w", "chocolate", "olive", "pink"
    ]

    if 1:
        fig = plt.figure(figsize=(6, 6))
        plt.grid(linestyle='dotted')
        plt.scatter(X_tsne[:, 0], X_tsne[:, 1], alpha=0.6, edgecolors="black")

        for c in range(n_classes):
            ind = Xc_tgt == c
            color = colors[c + 1]
            plt.scatter(Xc_tsne[ind][:, 0],
                        Xc_tsne[ind][:, 1],
                        s=250,
                        c=color,
                        edgecolors="black",
                        marker="*")
        # plt.axes().set_aspect('equal', 'datalim')
        plt.xlabel("t-SNE Feature 2")
        plt.ylabel("t-SNE Feature 1")
        title = "Source Dataset ({}) - Center: {} - Adv: {}".format(
            exp_dict["src_dataset"].upper().replace("BIG", ""),
            exp_dict["options"]["center"], exp_dict["options"]["disc"])
        plt.title(title)
        fig.tight_layout(rect=[0, 0.03, 1, 0.95])
        plt.savefig("figures/src_{}_center_{}_disc_{}.pdf".format(
            exp_dict["exp_name"].replace(" ", ""),
            exp_dict["options"]["center"], exp_dict["options"]["disc"]),
                    bbox_inches='tight',
                    transparent=False)

        plt.savefig("figures/src_{}_center_{}_disc_{}.png".format(
            exp_dict["exp_name"], exp_dict["options"]["center"],
            exp_dict["options"]["disc"]),
                    bbox_inches='tight',
                    transparent=False)
        # ms.visplot(fig)

    if 1:

        fig = plt.figure(figsize=(6, 6))
        plt.grid(linestyle='dotted')
        for c in range(n_classes):
            ind = Y_tgt == c
            color = colors[c + 1]

            plt.scatter(Y_tsne[ind][:, 0],
                        Y_tsne[ind][:, 1],
                        alpha=0.6,
                        c=color,
                        edgecolors="black")

        for c in range(n_classes):
            ind = Xc_tgt == c
            color = colors[c + 1]
            plt.scatter(Xc_tsne[ind][:, 0],
                        Xc_tsne[ind][:, 1],
                        s=350,
                        c=color,
                        edgecolors="black",
                        marker="*")
        # plt.axes().set_aspect('equal', 'datalim')
        plt.xlabel("t-SNE Feature 2")
        plt.ylabel("t-SNE Feature 1")
        title = "Target Dataset ({}) - Center: {} - Adv: {}".format(
            exp_dict["tgt_dataset"].upper().replace("BIG", ""),
            exp_dict["options"]["center"], exp_dict["options"]["disc"])
        plt.title(title)
        fig.tight_layout(rect=[0, 0.03, 1, 0.95])
        plt.savefig("figures/tgt_{}_center_{}_disc_{}.pdf".format(
            exp_dict["exp_name"], exp_dict["options"]["center"],
            exp_dict["options"]["disc"]),
                    bbox_inches='tight',
                    transparent=False)

        plt.savefig("figures/tgt_{}_center_{}_disc_{}.png".format(
            exp_dict["exp_name"], exp_dict["options"]["center"],
            exp_dict["options"]["disc"]),
                    bbox_inches='tight',
                    transparent=False)