Exemplo n.º 1
0
def main():

    INPUT_DIM = 20
    OUTPUT_DIM = 2
    BATCH_SIZE = 10
    EPOCH = 60

    criterion = nn.CrossEntropyLoss(reduction='sum').to(DEVICE)
    channelFilter = ChannelFilter(INPUT_DIM, OUTPUT_DIM).to(DEVICE)
    optim = Adam(channelFilter.parameters(), lr=1e-3, betas=(0.5, 0.99))

    dataloader = get_dataloader(dataset_path='S1',
                                train=True,
                                batch_size=BATCH_SIZE,
                                full=True)
    valid_loader = get_dataloader(dataset_path='S1',
                                  train=False,
                                  batch_size=5000,
                                  shuffle=False)

    for i in range(EPOCH):
        # training
        total_loss = 0
        for index, (data, label) in enumerate(dataloader):

            channelFilter.train()
            data, label = data.to(DEVICE), label.to(DEVICE)
            logit = channelFilter(data)

            loss = criterion(logit, label) / BATCH_SIZE
            optim.zero_grad()
            loss.backward()
            optim.step()

            total_loss += loss.item()

        # valid
        channelFilter.eval()
        with torch.no_grad():
            valid_data, valid_labels = valid_loader.dataset.data, valid_loader.dataset.label
            valid_data = torch.tensor(valid_data,
                                      dtype=torch.float32).to(DEVICE)
            valid_labels = torch.tensor(valid_labels).to(DEVICE)
            t_logit = channelFilter(valid_data)
            t_loss = criterion(t_logit, valid_labels)
            valid_pred = torch.argmax(t_logit, dim=1).data.cpu().numpy()

            valid_labels = valid_labels.data.cpu().numpy()
            acc, p, r, f1 = accuracy_score(valid_labels, valid_pred), precision_score(valid_labels, valid_pred), \
                            recall_score(valid_labels, valid_pred), f1_score(valid_labels, valid_pred)
            print(
                'iter: {}, train_loss: {:.3f}, valid_loss: {:.3f}, acc: {:.3f}, p: {:.3f}, r: {:.3f}, f1: {:.3f}'
                .format(i, total_loss / len(dataloader), t_loss, acc, p, r,
                        f1))
    torch.save(channelFilter.state_dict(), './channelFilter.pk')
Exemplo n.º 2
0
    def __init__(self, config):
        self.config = config
        self.mode = self.config.mode
        self.model = get_model(self.config)
        self.model_name = self.config.model_name
        self.load_checkpoint()
        self.use_val = self.config.use_val
        self.lr = self.config.learning_rate
        self.epochs = self.config.epochs
        self.start_epoch = self.config.start_epoch
        self.batch_size = self.config.batch_size
        self.checkpoint_path = self.config.checkpoint_path
        self.pose_mode = self.config.is_load_pose

        if self.mode == 'train':
            if self.use_val:
                self.train_loader, self.val_loader = get_dataloader(
                    self.config)
                self.n_batch_train = len(self.train_loader)
                self.n_batch_val = len(self.val_loader)
            else:
                self.train_loader = get_dataloader(self.config)
                self.n_batch_train = len(self.train_loader)
        elif self.mode == 'test':
            self.test_loader = get_dataloader(self.config)
            self.n_batch_test = len(self.test_loader)

        self.criterion = torch.nn.L1Loss()
        # self.criterion = CosineLoss()
        optimizer_cls = getattr(importlib.import_module('torch.optim'),
                                self.config.optimizer)
        self.optimizer = optimizer_cls(self.model.parameters(), lr=self.lr)
        # self.scheduler = Lookahead(self.optimizer, k=5, alpha=0.5)
        self.scheduler = StepLR(self.optimizer, step_size=self)

        self.best_ae = float('inf')
        self.best_loss = float('inf')

        if self.config.wandb:
            print("Using wandb to log results")
            name = self.config.prefix + "_" + self.model_name + "_" + self.config.solver
            wandb.init(project='gaze-estimation',
                       entity=self.config.wandb_entity,
                       name=name)
            wandb.config.update(config)

        # use accelerator to run on different devices easily
        self.model, self.optimizer, self.train_loader = accelerator.prepare(
            self.model, self.optimizer, self.train_loader)
Exemplo n.º 3
0
    def __init__(self, args, **kwargs):
        super(DecTrainer, self).__init__(args, **kwargs)

        # dataloader
        self.trainloader = get_dataloader(args, cfg, 'train')
        # self.trainloader_val = get_dataloader(args, cfg, 'train_voc')
        self.valloader = get_dataloader(args, cfg, 'val')
        self.denorm = self.trainloader.dataset.denorm
        self.use_triplet = args.use_triplet
        self.loss_3d = args.loss_3d
        self.normalize_feature = args.normalize_feature

        self.nclass = get_num_classes(args)
        self.classNames = get_class_names(args)
        assert self.nclass == len(self.classNames) - 1

        self.classIndex = {}
        for i, cname in enumerate(self.classNames):
            self.classIndex[cname] = i

        # model
        self.enc = get_model(cfg.NET, num_classes=self.nclass)
        self.criterion_cls = get_criterion(cfg.NET.LOSS)

        # optimizer using different LR
        enc_params = self.enc.parameter_groups(cfg.NET.LR, cfg.NET.WEIGHT_DECAY)
        self.optim_enc = self.get_optim(enc_params, cfg.NET)

        # checkpoint management
        self._define_checkpoint('enc', self.enc, self.optim_enc)
        self._load_checkpoint(args.resume)

        self.fixed_batch = None
        self.fixed_batch_path = args.fixed_batch_path
        if os.path.isfile(self.fixed_batch_path):
            print("Loading fixed batch from {}".format(self.fixed_batch_path))
            self.fixed_batch = torch.load(self.fixed_batch_path)

        # using cuda
        if cfg.NUM_GPUS != 0:
            self.enc = nn.DataParallel(self.enc)
            self.criterion_cls = nn.DataParallel(self.criterion_cls)
            self.enc = self.enc.cuda()
            self.criterion_cls = self.criterion_cls.cuda()

        # CHANGE: visual
        self.visual_times = 0
        self.dataset = args.dataset.lower()
Exemplo n.º 4
0
def run(config, num_checkpoint, epoch_end, output_filename):
    task = get_task(config)
    preprocess_opt = task.get_preprocess_opt()
    dataloader = get_dataloader(config, 'train',
                                get_transform(config, 'dev', **preprocess_opt))

    model = task.get_model()
    checkpoints = get_checkpoints(config, num_checkpoint, epoch_end)
    print('checkpoints:')
    print('\n'.join(checkpoints))

    utils.checkpoint.load_checkpoint(model, None, checkpoints[0])
    for i, checkpoint in enumerate(checkpoints[1:]):
        model2 = get_task(config).get_model()
        last_epoch, _ = utils.checkpoint.load_checkpoint(
            model2, None, checkpoint)
        swa.moving_average(model, model2, 1. / (i + 2))

    with torch.no_grad():
        swa.bn_update(dataloader, model)

    output_name = '{}.{}.{:03d}'.format(output_filename, num_checkpoint,
                                        last_epoch)
    print('save {}'.format(output_name))
    utils.checkpoint.save_checkpoint(
        config,
        model,
        None,
        0,
        0,
        name=output_name,
        weights_dict={'state_dict': model.state_dict()})
Exemplo n.º 5
0
    def train(self, train_dataset, test_data, output_dir):
        tracker = LossTracker(output_dir)
        global_steps = 0
        for res_idx, res in enumerate(self.cfg['resolutions']):
            self.set_optimizers_lr(self.cfg['learning_rates'][res_idx])
            batchs_in_phase = self.cfg['phase_lengths'][res_idx] // self.cfg['batch_sizes'][res_idx]
            dataloader = EndlessDataloader(get_dataloader(train_dataset, self.cfg['batch_sizes'][res_idx], resize=res, device=self.device))
            progress_bar = tqdm(range(batchs_in_phase * 2))
            for i in progress_bar:
                alpha = min(1.0, i / batchs_in_phase)  # < 1 in the first half and 1 in the second
                progress_bar.set_description(f"gs-{global_steps}_res-{res_idx}={res}x{res}_alpha-{alpha:.3f}")
                batch_real_data = dataloader.next()

                # train discriminator
                self.D_optimizer.zero_grad()
                loss_d = self.get_D_loss(batch_real_data, res_idx, alpha)
                loss_d.backward()
                self.D_optimizer.step()
                tracker.update(dict(loss_d=loss_d))

                if (1+i) % self.cfg['n_critic'] == 0:
                    # train generator
                    self.G_optimizer.zero_grad()
                    loss_g = self.get_G_loss(batch_real_data, res_idx, alpha)
                    loss_g.backward()
                    self.G_optimizer.step()
                    tracker.update(dict(loss_g=loss_g))
                global_steps += 1
                if global_steps % self.cfg['dump_imgs_freq'] == 0:
                    self.save_sample(global_steps, tracker, test_data, output_dir, res_idx, alpha)
            self.save_train_state(os.path.join(output_dir, 'checkpoints', f"ckpt_res-{res_idx}={res}x{res}-end.pt"))
Exemplo n.º 6
0
    def __init__(self, args):
        self.args = args
        self.device = args.device
        self.start_iter = 1
        self.train_iters = args.train_iters
        # coeffs
        self.lambda_A = args.lambda_A
        self.lambda_B = args.lambda_B
        self.lambda_idt = args.lambda_idt

        self.dataloader_A, self.dataloader_B = get_dataloader(args)

        self.D_B, self.G_AB = get_model(args)
        self.D_A, self.G_BA = get_model(args)

        self.criterion_GAN = GANLoss(use_lsgan=args.use_lsgan).to(args.device)
        self.criterion_cycle = nn.L1Loss()
        self.criterion_idt = nn.L1Loss()

        self.optimizer_D = torch.optim.Adam(
            itertools.chain(self.D_B.parameters(), self.D_A.parameters()),
            lr=args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay)
        self.optimizer_G = torch.optim.Adam(
            itertools.chain(self.G_AB.parameters(), self.G_BA.parameters()),
            lr=args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay)

        self.logger = self.get_logger(args)
        self.writer = SummaryWriter(args.log_dir)

        save_args(args.log_dir, args)
Exemplo n.º 7
0
def run(config):
    train_dir = config.train.dir

    task = get_task(config)
    optimizer = get_optimizer(config, task.get_model().parameters())

    checkpoint = utils.checkpoint.get_initial_checkpoint(config)
    if checkpoint is not None:
        last_epoch, step = utils.checkpoint.load_checkpoint(
            task.get_model(), optimizer, checkpoint)
    else:
        last_epoch, step = -1, -1

    print('from checkpoint: {} last epoch:{}'.format(checkpoint, last_epoch))
    scheduler = get_scheduler(config, optimizer, last_epoch)

    preprocess_opt = task.get_preprocess_opt()
    dataloaders = {
        split: get_dataloader(config, split,
                              get_transform(config, split, **preprocess_opt))
        for split in ['train', 'dev']
    }

    writer = SummaryWriter(config.train.dir)
    train(config, task, dataloaders, optimizer, scheduler, writer,
          last_epoch + 1)
Exemplo n.º 8
0
def run(config):
    train_dir = config.train.dir

    model = get_model(config)
    if torch.cuda.is_available():
        model = model.cuda()
    criterion = get_loss(config)
    optimizer = get_optimizer(config, model.parameters())

    checkpoint = utils.checkpoint.get_initial_checkpoint(config)
    if checkpoint is not None:
        last_epoch, step = utils.checkpoint.load_checkpoint(
            model, optimizer, checkpoint)
    else:
        last_epoch, step = -1, -1

    print('from checkpoint: {} last epoch:{}'.format(checkpoint, last_epoch))
    scheduler = get_scheduler(config, optimizer, last_epoch)

    dataloaders = {
        split: get_dataloader(config, split, get_transform(config, split))
        for split in ['train', 'val']
    }

    writer = SummaryWriter(config.train.dir)
    train(config, model, dataloaders, criterion, optimizer, scheduler, writer,
          last_epoch + 1)
Exemplo n.º 9
0
def inference(config, model, split, src_file, output_path=None):
    if split == 'test':
        data_path = '../data/dicom-images-test'
    else:
        data_path = '../data/dicom-images-train'

    dataset = CustomTestDataset(data_path, src_file, split, config[INPUT_SIZE])
    dataloader = get_dataloader(dataset, 1)

    model = model.cuda()
    model.eval()
    model = TTAWrapper(model, fliplr_image2mask)

    with torch.no_grad():
        total_step = len(dataloader)
        for i, (images, id) in tqdm.tqdm(enumerate(dataloader),
                                         total=total_step):

            images = torch.cat(images, dim=0)
            images = images.cuda()

            merged_out = model(images)
            mean_logits = torch.mean(merged_out, dim=0, keepdim=True)

            np.save(os.path.join(output_path, id[0] + '.npy'),
                    mean_logits.cpu().numpy())
Exemplo n.º 10
0
def run(config):
    model = get_model(config[MODEL_NAME], config[MODEL_PARAMS]).cuda()
    criterion = get_loss(config[LOSS_NAME], config[LOSS_PARAMS])
    optimizer = get_optimizer(config[OPTIM_NAME],
                              model.parameters(),
                              optimizer_params=config[OPTIM_PARAMS])

    last_epoch = -1
    scheduler = get_scheduler(config[SCHEDULER_NAME], optimizer, last_epoch,
                              config[SCHEDULER_PARAMS])

    datasets = {
        stage: CustomDataset(DATA_DIR, stage, config[FOLD_ID],
                             config[DATA_PREFIX], config[INPUT_SIZE])
        for stage in ['train', 'test']
    }

    dataloaders = {
        stage: get_dataloader(datasets[stage], config[BATCH_SIZE])
        for stage in ['train', 'test']
    }

    writer = SummaryWriter(config[TRAIN_DIR])
    clip_grad_value_(model.parameters(), 2.0)
    train(config, model, dataloaders, criterion, optimizer, scheduler, writer,
          last_epoch + 1)
Exemplo n.º 11
0
def main():
    args = arguments()

    num_templates = 25  # 29  # 25  # aka the number of clusters

    train_loader, weights_dir = get_dataloader(args.traindata, args, num_templates)

    model = DetectionModel(num_objects=1, num_templates=num_templates)
    loss_fn = DetectionCriterion(num_templates)

    optimizer = optim.SGD(model.learnable_parameters(args.lr), lr=args.lr, momentum=args.momentum,
                          weight_decay=args.weight_decay)
    # optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)

    if args.resume:
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        # Set the start epoch if it has not been
        if not args.start_epoch:
            args.start_epoch = checkpoint['epoch']

    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, last_epoch=args.start_epoch-1)

    # train and evalute for `epochs`
    for epoch in range(args.start_epoch, args.epochs):
        scheduler.step()
        trainer.train(model, loss_fn, optimizer, train_loader, epoch, save_path=weights_dir)
Exemplo n.º 12
0
def run(config, folds_dir, balanced):
    model = get_model(config[MODEL_NAME], config[MODEL_PARAMS]).cuda()
    criterion = get_loss(config[LOSS_NAME], config[LOSS_PARAMS])
    optimizer = get_optimizer(config[OPTIM_NAME],
                              model.parameters(),
                              optimizer_params=config[OPTIM_PARAMS])

    last_epoch = -1
    scheduler = get_scheduler(config[SCHEDULER_NAME], optimizer, last_epoch,
                              config[SCHEDULER_PARAMS])

    datasets = {
        stage: CustomDataset(folds_dir, stage, config[FOLD_ID],
                             config[DATA_PREFIX], config[INPUT_SIZE])
        for stage in ['train', 'test']
    }

    print('Loading sampler')
    if balanced:
        train_sampler = BalancedBatchSampler(datasets['train'])
    else:
        train_sampler = None
    print('Sampler loaded')
    dataloaders = {
        stage: get_dataloader(datasets[stage], config[BATCH_SIZE],
                              train_sampler)
        for stage in ['train', 'test']
    }

    writer = SummaryWriter(config[TRAIN_DIR])
    train(config, model, dataloaders, criterion, optimizer, scheduler, writer,
          last_epoch + 1)
Exemplo n.º 13
0
def main():
    args = parse_args()

    # set random seed
    utils.seed_torch(args.seed)

    # Setup CUDA, GPU
    if not torch.cuda.is_available():
        print("cuda is not available")
        exit(0)

    train_loader, valid_loader = datasets.get_dataloader(
        fold=args.fold,
        batch_size=args.batch_size,
        num_workers=args.num_workers)

    # define loss function (criterion) and optimizer
    criterion = torch.nn.CrossEntropyLoss()

    model = PandaNet(arch=args.arch)
    model.to("cuda")

    metric = ArcMarginProduct(in_features=512, out_features=6).to("cuda")

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.learning_rate,
                                 weight_decay=args.weight_decay)

    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10, 30, 60, 90], gamma=0.5)

    """ Train the model """
    from datetime import datetime
    current_time = datetime.now().strftime('%b%d_%H-%M-%S')
    log_prefix = f'{current_time}_{args.arch}_fold_{args.fold}_{args.tile_size}_{args.num_tiles}'
    log_dir = os.path.join(configure.TRAINING_LOG_PATH,
                           log_prefix)

    tb_writer = None
    if args.log:
        tb_writer = SummaryWriter(log_dir=log_dir)

    best_score = 0.0
    model_path = os.path.join(configure.MODEL_PATH,
                              f'{args.arch}_fold_{args.fold}_{args.tile_size}_{args.num_tiles}.pth')

    print(f'training started: {current_time}')
    for epoch in range(args.epochs):
        train_loss = train(
            dataloader=train_loader,
            model=model,
            criterion=criterion,
            metric=metric,
            optimizer=optimizer)

    current_time = datetime.now().strftime('%b%d_%H-%M-%S')
    print(f'training finished: {current_time}')
Exemplo n.º 14
0
def inference_single_tta(config, task, preprocess_opt, split, fold, flip,
                         align, ret_dict):
    config.transform.params.align = align
    transform = 'test' if split == 'test' else 'all'
    config.data.params.landmark_ver = fold
    dataloader = get_dataloader(
        config, split,
        get_transform(config, transform, flip=flip, **preprocess_opt))
    id_dict = inference(config, task, dataloader, ret_dict)
    return id_dict
Exemplo n.º 15
0
def main():
    args = arguments()

    num_templates = 25  # aka the number of clusters

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    img_transforms = transforms.Compose([transforms.ToTensor(), normalize])
    train_loader, _ = get_dataloader(args.traindata,
                                     args,
                                     num_templates,
                                     img_transforms=img_transforms)

    model = DetectionModel(num_objects=1, num_templates=num_templates)
    loss_fn = DetectionCriterion(num_templates)

    # directory where we'll store model weights
    weights_dir = "weights"
    if not osp.exists(weights_dir):
        os.mkdir(weights_dir)

    # check for CUDA
    if torch.cuda.is_available():
        device = torch.device('cuda:0')
    else:
        device = torch.device('cpu')

    optimizer = optim.SGD(model.learnable_parameters(args.lr),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    # optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)

    if args.resume:
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        # Set the start epoch if it has not been
        if not args.start_epoch:
            args.start_epoch = checkpoint['epoch']

    scheduler = optim.lr_scheduler.StepLR(optimizer,
                                          step_size=20,
                                          last_epoch=args.start_epoch - 1)

    # train and evalute for `epochs`
    for epoch in range(args.start_epoch, args.epochs):
        scheduler.step()
        trainer.train(model,
                      loss_fn,
                      optimizer,
                      train_loader,
                      epoch,
                      save_path=weights_dir,
                      device=device)
Exemplo n.º 16
0
def dataloader(args):
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    val_transforms = transforms.Compose([transforms.ToTensor(), normalize])

    val_loader, templates = get_dataloader(args.dataset,
                                           args,
                                           train=False,
                                           split=args.split,
                                           img_transforms=val_transforms)
    return val_loader, templates
Exemplo n.º 17
0
def search_once(config, policy):
    model = get_model(config).cuda()
    criterion = get_loss(config)
    optimizer = get_optimizer(config, model.parameters())
    scheduler = get_scheduler(config, optimizer, -1)

    transforms = {'train': get_transform(config, 'train', params={'policies': policy}),
                  'val': get_transform(config, 'val')}
    dataloaders = {split:get_dataloader(config, split, transforms[split])
                   for split in ['train', 'val']}

    score_dict = train(config, model, dataloaders, criterion, optimizer, scheduler, None, 0)
    return score_dict['f1_mavg']
Exemplo n.º 18
0
    def __init__(self, args, **kwargs):
        super(DecTrainer, self).__init__(args, **kwargs)

        # dataloader
        self.trainloader = get_dataloader(args, cfg, 'train')
        self.trainloader_val = get_dataloader(args, cfg, 'train_voc')
        self.valloader = get_dataloader(args, cfg, 'val')
        self.denorm = self.trainloader.dataset.denorm

        self.nclass = get_num_classes(args)
        self.classNames = get_class_names(args)[:-1]
        assert self.nclass == len(self.classNames)

        self.classIndex = {}
        for i, cname in enumerate(self.classNames):
            self.classIndex[cname] = i

        # model
        self.enc = get_model(cfg.GENERATOR, num_classes=self.nclass)
        self.criterion_cls = get_criterion(cfg.GENERATOR.LOSS)
        print(self.enc)

        # optimizer using different LR
        enc_params = self.enc.parameter_groups(cfg.GENERATOR.LR, cfg.GENERATOR.WEIGHT_DECAY)
        self.optim_enc = self.get_optim(enc_params, cfg.GENERATOR)

        # checkpoint management
        self._define_checkpoint('enc', self.enc, self.optim_enc)
        self._load_checkpoint(args.resume)

        self.fixed_batch = None
        self.fixed_batch_path = args.fixed_batch_path
        if os.path.isfile(self.fixed_batch_path):
            print("Loading fixed batch from {}".format(self.fixed_batch_path))
            self.fixed_batch = torch.load(self.fixed_batch_path)

        # using cuda
        self.enc = nn.DataParallel(self.enc).cuda()
        self.criterion_cls = nn.DataParallel(self.criterion_cls).cuda()
Exemplo n.º 19
0
def run(config, split, checkpoint_name, output_path):
    train_dir = config.train.dir

    task = get_task(config)
    checkpoint = utils.checkpoint.get_checkpoint(config, checkpoint_name)
    last_epoch, step = utils.checkpoint.load_checkpoint(
        task.get_model(), None, checkpoint)

    print('from checkpoint: {} last epoch:{}'.format(checkpoint, last_epoch))

    preprocess_opt = task.get_preprocess_opt()
    dataloader = get_dataloader(config, split,
                                get_transform(config, split, **preprocess_opt))

    df = inference(config, task, dataloader)
    df.to_csv(output_path, index=False)
Exemplo n.º 20
0
    def train(self, train_dataset, test_data, test_dataset, output_dir):
        tracker = LossTracker(output_dir)
        while self.res_idx < len(self.cfg['resolutions']):
            res = self.cfg['resolutions'][self.res_idx]
            self.set_optimizers_lr(self.cfg['learning_rates'][self.res_idx])
            batch_size = self.cfg['batch_sizes'][self.res_idx]
            batchs_in_phase = self.cfg['phase_lengths'][
                self.res_idx] // batch_size
            dataloader = EndlessDataloader(
                get_dataloader(train_dataset,
                               batch_size,
                               resize=res,
                               device=self.device))
            progress_bar = tqdm(range(batchs_in_phase * 2))
            for i in progress_bar:
                # first half of the batchs are fade in phase where alpha < 1. in the second half alpha =1
                alpha = min(1.0, i / batchs_in_phase)
                batch_real_data = dataloader.next()
                self.perform_train_step(batch_real_data,
                                        tracker,
                                        log=(i % 10 == 0),
                                        calc_scores=(i % 100 == 0),
                                        valid_ds=test_dataset,
                                        final_resolution_idx=self.res_idx,
                                        alpha=alpha)

                self.train_step += 1
                progress_tag = f"gs-{self.train_step}_res-{self.res_idx}={res}x{res}_alpha-{alpha:.2f}"
                progress_bar.set_description(progress_tag)

                if self.train_step % self.cfg['dump_imgs_freq'] == 0:
                    tracker.plot()
                    dump_path = os.path.join(output_dir, 'images',
                                             f"{progress_tag}.jpg")
                    self.save_sample(dump_path,
                                     test_data[0],
                                     test_data[1],
                                     final_resolution_idx=self.res_idx,
                                     alpha=alpha)

                if self.train_step % self.cfg['checkpoint_freq'] == 0:
                    self.save_train_state(
                        os.path.join(output_dir, 'checkpoints',
                                     f"ckpt_{progress_tag}.pt"))
            self.res_idx += 1
        self.save_train_state(
            os.path.join(output_dir, 'checkpoints', f"ckpt_final.pt"))
Exemplo n.º 21
0
    def __init__(self, output_dir):

        # make dir for all kinds of output 
        self.model_dir = os.path.join(output_dir , 'Model')
        os.makedirs(self.model_dir)
        self.image_dir = os.path.join(output_dir , 'Image')
        os.makedirs(self.image_dir)

        # make dataloader 
        self.dataloader = get_dataloader()
 
        # other variables
        self.batch_size = cfg.TRAIN.BATCH_SIZE 

        # get fixed images used for comparison for each epoch 
        self.fixed_image = self.prepare_data(  next(iter(self.dataloader)) )[0]
        save_img_results( self.fixed_image.cpu(), None, -1, self.image_dir )
Exemplo n.º 22
0
    def train(self, train_dataset, test_data, output_dir):
        train_dataloader = get_dataloader(train_dataset,
                                          self.cfg['batch_size'],
                                          resize=None,
                                          device=self.device)
        tracker = LossTracker(output_dir)
        self.set_optimizers_lr(self.cfg['lr'])
        for epoch in range(self.cfg['epochs']):
            for batch_real_data in tqdm(train_dataloader):
                self.perform_train_step(batch_real_data, tracker)

            tracker.plot()
            dump_path = os.path.join(output_dir, 'images',
                                     f"epoch-{epoch}.jpg")
            self.save_sample(dump_path, test_data[0], test_data[1])

            self.save_train_state(os.path.join(output_dir, "last_ckp.pth"))
Exemplo n.º 23
0
def train_shadow_model(dataset,
                       model_type,
                       args,
                       epochs,
                       lr,
                       hidden_attribute,
                       class_distribution,
                       device,
                       size=2000,
                       filename='test'):
    dataloader = datasets.get_dataloader(dataset, hidden_attribute, size,
                                         class_distribution)
    net = utils.get_model(model_type).to(device)

    tic = time.time()
    criterion = nn.MSELoss(reduction='sum')
    optimizer = optim.Adam(net.parameters(), lr=lr)

    losses = []
    for epoch in range(epochs):
        running_loss = 0
        for i, data in enumerate(dataloader, 0):
            inputs, labels = data

            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            outputs = net(inputs).to(device)

            loss = criterion(outputs, labels.float())
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        losses.append(running_loss / (i + 1))

    tac = time.time()
    print(f'[{epoch+1}] loss: {running_loss/(i+1):.3f} - {int(tac-tic)} sec')

    path = os.path.join(args.models_dir, f'{filename}.pth')
    torch.save(net.state_dict(), path)
Exemplo n.º 24
0
def run(args, log):

    df = pd.read_csv(args.df_path)
    df_train = df[df['Fold']!=args.fold]
    df_valid = df[df['Fold']==args.fold]
    dfs = {}
    dfs['train'] = df_train
    dfs['val'] = df_valid
    
    model = get_model(args).cuda()
    
    if args.mode != 'segmentation':
        for param in model.model.encoder.parameters():
            param.requires_grad = True
        for param in model.model.decoder.parameters():
            param.requires_grad = True
        for params in model.model.classification_head.parameters():
            params.requires_grad = False

    elif args.mode == 'classification':
        for param in model.model.encoder.parameters():
            param.requires_grad = False
        for param in model.model.decoder.parameters():
            param.requires_grad = False
        for param in model.classification_head.parameters():
            param.requires_grad = True    

    criterion = get_loss(args)
    optimizer = get_optimizer(args, model)
    
    if args.initial_ckpt is not None:
        last_epoch, step = checkpoint.load_checkpoint(args, model, checkpoint=args.initial_ckpt)
        log.write(f'Resume training from {args.initial_ckpt} @ {last_epoch}\n')
    else:
        last_epoch, step = -1, -1
    
    dataloaders = {mode:get_dataloader(args.data_dir, dfs[mode], mode, args.pretrain, args.batch_size) for mode in ['train', 'val']}   
    seed_everything(seed=123)
    clr = CLR(optimizer, len(dataloaders['train']))

    train(args, model, dataloaders['train'], criterion, optimizer, clr)
Exemplo n.º 25
0
def run(args):
    df = pd.read_csv(args.df_path)
    df_train = df[df['fold'] != args.fold]

    model = get_model(args).cuda()
    dataloader = get_dataloader(args.data_dir, df_train, 'train',
                                args.pretrain, args.batch_size)
    checkpoints = get_checkpoints(args)

    checkpoint.load_checkpoint(
        args, model, None, checkpoint=checkpoints[0]
    )  # args, model, ckpt_name, checkpoint=None, optimizer=None
    for i, ckpt in enumerate(checkpoints[1:]):
        print(i, ckpt)
        model2 = get_model(args).cuda()
        last_epoch, _ = checkpoint.load_checkpoint(args,
                                                   model2,
                                                   None,
                                                   checkpoint=ckpt)
        if args.ema is None:
            swa.moving_average(model, model2, 1. / (i + 2))
        else:
            swa.moving_average(model, model2, args.ema)

    with torch.no_grad():
        swa.bn_update(dataloader, model)

    if args.ema is not None:
        output_name = f'model_ema_{len(checkpoints)}'
    else:
        output_name = f'model_swa_{len(checkpoints)}'

    print('save {}'.format(output_name))

    checkpoint.save_checkpoint(args,
                               model,
                               None,
                               0,
                               0,
                               name=output_name,
                               weights_dict={'state_dict': model.state_dict()})
Exemplo n.º 26
0
def run(config):
    train_group_csv_dir = './data/group_csv/'
    writer = SummaryWriter(config.train.dir)
    train_filenames = list(glob.glob(os.path.join(train_group_csv_dir, 'data_train_group_*')))[1:]
    
    for ti, train_file in tqdm.tqdm(enumerate(train_filenames)):
        gi_tr = train_file.replace('data_train_group_', '')
        gi_tr = gi_tr.split('/')[-1]
        gi_tr = gi_tr.replace('.csv', '')
        group_idx = int(gi_tr)
        
        utils.prepare_train_directories(config, group_idx)
        
        model = get_model(config, group_idx)
        if torch.cuda.is_available():
            model = model.cuda()
        criterion = get_loss(config)
        optimizer = get_optimizer(config, model.parameters())
        
    

        checkpoint = utils.checkpoint.get_initial_checkpoint(config, group_idx)
        if checkpoint is not None:
            last_epoch, step = utils.checkpoint.load_checkpoint(model, optimizer, checkpoint)
        else:
            last_epoch, step = -1, -1

        if last_epoch > config.train.num_epochs:
            print('group -- ', str(group_idx), '-- index-', ti, '  ----已xl,跳过')
            continue
        print('from checkpoint: {} last epoch:{}'.format(checkpoint, last_epoch))
        print('group -- ', str(group_idx), '-- index-', ti)
        scheduler = get_scheduler(config, optimizer, last_epoch)
    
        dataloaders = {split:get_dataloader(config, group_idx, split, get_transform(config, split))
                   for split in ['train', 'val']}
    

    
        train(config,group_idx, model, dataloaders, criterion, optimizer, scheduler,
          writer, last_epoch+1)
Exemplo n.º 27
0
def inference(config, model, split, src_file, tta, output_filename=None):
    if split == 'test':
        data_path = '../data/dicom-images-test'
    else:
        data_path = '../data/dicom-images-train'

    dataset = CustomTestDataset(data_path, src_file, split, config[INPUT_SIZE])
    dataloader = get_dataloader(dataset, 1)

    model = model.cuda()
    model.eval()

    key_list = []
    probability_list = []

    with torch.no_grad():
        total_step = len(dataloader)
        for i, (images, id) in tqdm.tqdm(enumerate(dataloader), total=total_step):
            images = torch.cat(images, dim=0)
            images = images.cuda()

            logits = model(images)
            mean_logits = torch.mean(logits, dim=0, keepdim=True)
            probabilities = torch.softmax(mean_logits, dim=-1)
            probability_list.append(probabilities.cpu().numpy())

            key_list.extend(id)

        probabilities = np.concatenate(probability_list, axis=0)
        assert probabilities.shape[-1] == NUM_CLASSES

        records = []
        for id, probability in zip(key_list, probabilities):
            records.append(tuple([id] + ['{:.04f}'.format(p) for p in probability]))

        columns = ['id'] + ['P{:04d}'.format(l) for l in range(NUM_CLASSES)]

        df = pd.DataFrame.from_records(records, columns=columns)
        print('save {}'.format(output_filename))
        df.to_csv(output_filename, index=False)
def train():
    logging.info("==========loading data==========")
    train_data, valid_data, test_data = get_dataloader(Config)
    logging.info("==========end==========")

    logging.info("==========loading model==========")
    model = getattr(models, Config.model.name)(Config.model.num_class)
    logging.info("==========end==========")
    optimizer = getattr(optim,
                        Config.train.optimizer)(model.parameters(),
                                                lr=Config.train.lr,
                                                weight_decay=Config.train.wd,
                                                momentum=Config.train.momentum)
    ce_loss = nn.CrossEntropyLoss()
    lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau()
    model = model.cuda()
    model_solver = Solver(Config, model)
    model_solver.fit(train_data=train_data,
                     valid_data=valid_data,
                     optimizer=optimizer,
                     criterion=ce_loss,
                     lr_schduler=lr_scheduler)
Exemplo n.º 29
0
    def __init__(self, output_dir):

        # make dir for all kinds of output 
        self.model_dir = os.path.join(output_dir , 'Model')
        os.makedirs(self.model_dir)
        self.image_dir = os.path.join(output_dir , 'Image')
        os.makedirs(self.image_dir)
        self.opt_dir = os.path.join(output_dir , 'Opt')
        os.makedirs(self.opt_dir)
        self.output_csv_file = os.path.join(output_dir , 'losses.csv')

        # make dataloader and code buffer 
        self.dataloader = get_dataloader()
      
        # other variables
        self.batch_size = cfg.TRAIN.BATCH_SIZE 
        self.patch_stride = 4.0 
        self.n_out = 24
        self.recp_field = 34       

        # get fixed images used for comparison for each epoch 
        self.fixed_image =  self.prepare_data(  next(iter(self.dataloader)) )[1]
        save_img_results( self.fixed_image.cpu(), None, -1, self.image_dir )
Exemplo n.º 30
0
def submit(args, log):
    df = pd.read_csv(args.df_path)
    df['Image'] = df.Image_Label.map(lambda v: v[:v.find('_')])
    print(df.head())

    model = get_model(args).cuda()
    last_epoch, step = checkpoint.load_checkpoint(args,
                                                  model,
                                                  checkpoint=args.initial_ckpt)
    log.write(f'Loaded checkpoint from {args.initial_ckpt} @ {last_epoch}\n')

    dataloader = get_dataloader(args.data_dir, df, 'test', args.pretrain,
                                args.batch_size)
    seed_everything()

    # inference
    test_ids, mask_predictions = inference_submit(model, dataloader,
                                                  args.tta_augment)

    assert len(test_ids) == mask_predictions.shape[0]

    ids = []
    rles = []
    for i, image_id in tqdm.tqdm(enumerate(test_ids), total=len(test_ids)):
        predictions = mask_predictions[i]
        for cls_idx in range(4):
            prediction = predictions[cls_idx, :, :]
            H, W = prediction.shape
            assert H == 350 and W == 525
            rle_encoded = mask2rle(prediction)
            assert np.all(rle2mask(H, W, rle_encoded) == prediction)
            ids.append(f'{image_id}_{LABEL_LIST[cls_idx]}')
            rles.append(rle_encoded)

    df_submission = pd.DataFrame({'Image_Label': ids, 'EncodedPixels': rles})
    df_submission.to_csv(args.sub_name, index=False)
    print(df_submission.head())