Пример #1
0
def train_mlp(args):
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    assert args.cae_weight, "No trained cae weight"
    cae = CAE().to(device)
    cae.eval()
    cae.load_state_dict(torch.load(args.cae_weight))

    print('a')
    train_dataset = PathDataSet(S2D_data_path, cae.encoder)
    val_dataset = PathDataSet(S2D_data_path, cae.encoder, is_val=True)

    print('b')
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True)
    val_loader = DataLoader(val_dataset,
                            batch_size=args.batch_size,
                            shuffle=False)

    now = datetime.now()
    output_folder = args.output_folder + '/' + now.strftime(
        '%Y-%m-%d_%H-%M-%S')
    check_and_create_dir(output_folder)

    model = MLP(args.input_size, args.output_size).to(device)
    if args.load_weights:
        print("Load weight from {}".format(args.load_weights))
        model.load_state_dict(torch.load(args.load_weights))

    criterion = nn.MSELoss()
    # optimizer = torch.optim.Adagrad(model.parameters())
    optimizer = AdaBelief(model.parameters(),
                          lr=1e-4,
                          eps=1e-10,
                          betas=(0.9, 0.999),
                          weight_decouple=True,
                          rectify=False)

    for epoch in range(args.max_epoch):
        model.train()

        for i, data in enumerate(tqdm(train_loader)):
            # get data
            input_data = data[0].to(device)  # B, 32
            next_config = data[1].to(device)  # B, 2

            # predict
            predict_config = model(input_data)

            # get loss
            loss = criterion(predict_config, next_config)

            # backpropagation
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 5)
            optimizer.step()

            neptune.log_metric("batch_loss", loss.item())

        print('\ncalculate validation accuracy..')

        model.eval()
        with torch.no_grad():
            losses = []
            for i, data in enumerate(tqdm(val_loader)):
                # get data
                input_data = data[0].to(device)  # B, 32
                next_config = data[1].to(device)  # B, 2

                # predict
                predict_config = model(input_data)

                # get loss
                loss = criterion(predict_config, next_config)

                losses.append(loss.item())

            val_loss = np.mean(losses)
            neptune.log_metric("val_loss", val_loss)

        print("validation result, epoch {}: {}".format(epoch, val_loss))
        if epoch % 5 == 0:
            torch.save(model.state_dict(),
                       '{}/epoch_{}.tar'.format(output_folder, epoch))
def train_loop(folds, fold):

    if CFG.device == 'GPU':
        LOGGER.info(f"========== fold: {fold} training ==========")
    elif CFG.device == 'TPU':
        if CFG.nprocs == 1:
            LOGGER.info(f"========== fold: {fold} training ==========")
        elif CFG.nprocs == 8:
            xm.master_print(f"========== fold: {fold} training ==========")

    # ====================================================
    # loader
    # ====================================================
    trn_idx = folds[folds['fold'] != fold].index
    val_idx = folds[folds['fold'] == fold].index

    train_folds = folds.loc[trn_idx].reset_index(drop=True)
    valid_folds = folds.loc[val_idx].reset_index(drop=True)

    train_folds = train_folds[train_folds['StudyInstanceUID'].isin(
        train_annotations['StudyInstanceUID'].unique())].reset_index(drop=True)

    valid_labels = valid_folds[CFG.target_cols].values

    train_dataset = TrainDataset(train_folds,
                                 train_annotations,
                                 use_annot=True,
                                 transform=get_transforms(data='train'))
    valid_dataset = TrainDataset(valid_folds,
                                 train_annotations,
                                 use_annot=False,
                                 transform=get_transforms(data='valid'))

    if CFG.device == 'GPU':
        train_loader = DataLoader(train_dataset,
                                  batch_size=CFG.batch_size,
                                  shuffle=True,
                                  num_workers=CFG.num_workers,
                                  pin_memory=True,
                                  drop_last=True)
        valid_loader = DataLoader(valid_dataset,
                                  batch_size=CFG.batch_size * 2,
                                  shuffle=False,
                                  num_workers=CFG.num_workers,
                                  pin_memory=True,
                                  drop_last=False)

    elif CFG.device == 'TPU':
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset,
            num_replicas=xm.xrt_world_size(),
            rank=xm.get_ordinal(),
            shuffle=True)
        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=CFG.batch_size,
                                                   sampler=train_sampler,
                                                   drop_last=True,
                                                   num_workers=CFG.num_workers)

        valid_sampler = torch.utils.data.distributed.DistributedSampler(
            valid_dataset,
            num_replicas=xm.xrt_world_size(),
            rank=xm.get_ordinal(),
            shuffle=False)
        valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                                   batch_size=CFG.batch_size *
                                                   2,
                                                   sampler=valid_sampler,
                                                   drop_last=False,
                                                   num_workers=CFG.num_workers)

    # ====================================================
    # scheduler
    # ====================================================
    def get_scheduler(optimizer):
        if CFG.scheduler == 'ReduceLROnPlateau':
            scheduler = ReduceLROnPlateau(optimizer,
                                          mode='min',
                                          factor=CFG.factor,
                                          patience=CFG.patience,
                                          verbose=True,
                                          eps=CFG.eps)
        elif CFG.scheduler == 'CosineAnnealingLR':
            scheduler = CosineAnnealingLR(optimizer,
                                          T_max=CFG.T_max,
                                          eta_min=CFG.min_lr,
                                          last_epoch=-1)
        elif CFG.scheduler == 'CosineAnnealingWarmRestarts':
            scheduler = CosineAnnealingWarmRestarts(optimizer,
                                                    T_0=CFG.T_0,
                                                    T_mult=1,
                                                    eta_min=CFG.min_lr,
                                                    last_epoch=-1)
        return scheduler

    # ====================================================
    # model & optimizer
    # ====================================================
    if CFG.device == 'TPU':
        device = xm.xla_device()
    elif CFG.device == 'GPU':
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    teacher_model = CustomSeResNet152D(CFG.model_name, pretrained=False)
    teacher_model.to(device)
    state = torch.load(CFG.teacher)
    teacher_model.load_state_dict(state['model'])
    for param in teacher_model.parameters():
        param.requires_grad = False
    teacher_model.eval()
    #     teacher_model.to(device)

    model = CustomSeResNet152D_WLF(CFG.model_name, pretrained=True)
    model.to(device)
    #     state = torch.load(CFG.student)
    #     model.load_state_dict(state['model'])

    optimizer = AdaBelief(model.parameters(),
                          lr=CFG.lr,
                          weight_decay=CFG.weight_decay)
    scheduler = get_scheduler(optimizer)

    # ====================================================
    # loop
    # ====================================================
    train_criterion = CustomLoss(weights=CFG.weights)
    valid_criterion = nn.BCEWithLogitsLoss()

    best_score = 0.
    best_loss = np.inf

    for epoch in range(CFG.epochs):

        start_time = time.time()

        # train
        if CFG.device == 'TPU':
            if CFG.nprocs == 1:
                avg_loss = train_fn(train_loader, teacher_model, model,
                                    train_criterion, optimizer, epoch,
                                    scheduler, device)
            elif CFG.nprocs == 8:
                para_train_loader = pl.ParallelLoader(train_loader, [device])
                avg_loss = train_fn(
                    para_train_loader.per_device_loader(device), teacher_model,
                    model, train_criterion, optimizer, epoch, scheduler,
                    device)
        elif CFG.device == 'GPU':
            avg_loss = train_fn(train_loader, teacher_model, model,
                                train_criterion, optimizer, epoch, scheduler,
                                device)

        # eval
        if CFG.device == 'TPU':
            if CFG.nprocs == 1:
                avg_val_loss, preds, _ = valid_fn(valid_loader, model,
                                                  valid_criterion, device)
            elif CFG.nprocs == 8:
                para_valid_loader = pl.ParallelLoader(valid_loader, [device])
                avg_val_loss, preds, valid_labels = valid_fn(
                    para_valid_loader.per_device_loader(device), model,
                    valid_criterion, device)
                preds = idist.all_gather(torch.tensor(preds)).to('cpu').numpy()
                valid_labels = idist.all_gather(
                    torch.tensor(valid_labels)).to('cpu').numpy()
        elif CFG.device == 'GPU':
            avg_val_loss, preds, _ = valid_fn(valid_loader, model,
                                              valid_criterion, device)

        if isinstance(scheduler, ReduceLROnPlateau):
            scheduler.step(avg_val_loss)
        elif isinstance(scheduler, CosineAnnealingLR):
            scheduler.step()
        elif isinstance(scheduler, CosineAnnealingWarmRestarts):
            scheduler.step()

        # scoring
        score, scores = get_score(valid_labels, preds)

        elapsed = time.time() - start_time

        if CFG.device == 'GPU':
            LOGGER.info(
                f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s'
            )
            LOGGER.info(
                f'Epoch {epoch+1} - Score: {score:.4f}  Scores: {np.round(scores, decimals=4)}'
            )
        elif CFG.device == 'TPU':
            if CFG.nprocs == 1:
                LOGGER.info(
                    f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s'
                )
                LOGGER.info(
                    f'Epoch {epoch+1} - Score: {score:.4f}  Scores: {np.round(scores, decimals=4)}'
                )
            elif CFG.nprocs == 8:
                xm.master_print(
                    f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s'
                )
                xm.master_print(
                    f'Epoch {epoch+1} - Score: {score:.4f}  Scores: {np.round(scores, decimals=4)}'
                )

        if score > best_score:
            best_score = score
            if CFG.device == 'GPU':
                LOGGER.info(
                    f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model'
                )
                torch.save({
                    'model': model.state_dict(),
                    'preds': preds
                }, OUTPUT_DIR + f'{CFG.model_name}_fold{fold}_best_score.pth')
            elif CFG.device == 'TPU':
                if CFG.nprocs == 1:
                    LOGGER.info(
                        f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model'
                    )
                elif CFG.nprocs == 8:
                    xm.master_print(
                        f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model'
                    )
                xm.save({
                    'model': model,
                    'preds': preds
                }, OUTPUT_DIR + f'{CFG.model_name}_fold{fold}_best_score.pth')

        if avg_val_loss < best_loss:
            best_loss = avg_val_loss
            if CFG.device == 'GPU':
                LOGGER.info(
                    f'Epoch {epoch+1} - Save Best Loss: {best_loss:.4f} Model')
                torch.save({
                    'model': model.state_dict(),
                    'preds': preds
                }, OUTPUT_DIR + f'{CFG.model_name}_fold{fold}_best_loss.pth')
            elif CFG.device == 'TPU':
                if CFG.nprocs == 1:
                    LOGGER.info(
                        f'Epoch {epoch+1} - Save Best Loss: {best_loss:.4f} Model'
                    )
                elif CFG.nprocs == 8:
                    xm.master_print(
                        f'Epoch {epoch+1} - Save Best Loss: {best_loss:.4f} Model'
                    )
                xm.save({
                    'model': model,
                    'preds': preds
                }, OUTPUT_DIR + f'{CFG.model_name}_fold{fold}_best_loss.pth')

#         # inference用に全て保存しておく
#         if CFG.device == 'TPU':
#             xm.save({'model': model.state_dict()}, OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_epoch{epoch+1}.pth')
#         elif CFG.device == 'GPU':
#             torch.save({'model': model.state_dict()}, OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_epoch{epoch+1}.pth')

        if CFG.nprocs != 8:
            check_point = torch.load(
                OUTPUT_DIR + f'{CFG.model_name}_fold{fold}_best_score.pth')
            for c in [f'pred_{c}' for c in CFG.target_cols]:
                valid_folds[c] = np.nan
            valid_folds[[f'pred_{c}'
                         for c in CFG.target_cols]] = check_point['preds']

    return valid_folds
Пример #3
0
    def __init__(self,
                 *,
                 latent_dim,
                 image_size,
                 optimizer="adam",
                 fmap_max=512,
                 fmap_inverse_coef=12,
                 transparent=False,
                 greyscale=False,
                 disc_output_size=5,
                 attn_res_layers=[],
                 sle_spatial=False,
                 ttur_mult=1.,
                 lr=2e-4,
                 rank=0,
                 ddp=False):
        super().__init__()
        self.latent_dim = latent_dim
        self.image_size = image_size

        G_kwargs = dict(image_size=image_size,
                        latent_dim=latent_dim,
                        fmap_max=fmap_max,
                        fmap_inverse_coef=fmap_inverse_coef,
                        transparent=transparent,
                        greyscale=greyscale,
                        attn_res_layers=attn_res_layers,
                        use_sle_spatial=sle_spatial)

        self.G = Generator(**G_kwargs)

        self.D = Discriminator(image_size=image_size,
                               fmap_max=fmap_max,
                               fmap_inverse_coef=fmap_inverse_coef,
                               transparent=transparent,
                               greyscale=greyscale,
                               attn_res_layers=attn_res_layers,
                               disc_output_size=disc_output_size)

        self.ema_updater = EMA(0.995)
        self.GE = Generator(**G_kwargs)
        set_requires_grad(self.GE, False)

        if optimizer == "adam":
            self.G_opt = Adam(self.G.parameters(), lr=lr, betas=(0.5, 0.9))
            self.D_opt = Adam(self.D.parameters(),
                              lr=lr * ttur_mult,
                              betas=(0.5, 0.9))
        elif optimizer == "adabelief":
            self.G_opt = AdaBelief(self.G.parameters(),
                                   lr=lr,
                                   betas=(0.5, 0.9))
            self.D_opt = AdaBelief(self.D.parameters(),
                                   lr=lr * ttur_mult,
                                   betas=(0.5, 0.9))
        else:
            assert False, "No valid optimizer is given"

        self.apply(self._init_weights)
        self.reset_parameter_averaging()

        self.cuda(rank)
        self.D_aug = AugWrapper(self.D, image_size)
Пример #4
0
def get_optimizer(model, optimizer_name, optimizer_params, scheduler_name,
                  scheduler_params, n_epochs):
    opt_lower = optimizer_name.lower()

    opt_look_ahed = optimizer_params["lookahead"]
    if opt_lower == 'sgd':
        optimizer = optim.SGD(model.parameters(),
                              lr=optimizer_params["lr"],
                              momentum=optimizer_params["momentum"],
                              weight_decay=optimizer_params["weight_decay"],
                              nesterov=True)
    elif opt_lower == 'adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=optimizer_params["lr"],
                               betas=(0.9, 0.999),
                               eps=1e-08,
                               weight_decay=0)
    elif opt_lower == 'adamw':
        optimizer = torch.optim.AdamW(
            model.parameters(),
            lr=optimizer_params["lr"],
            weight_decay=optimizer_params["weight_decay"],
            eps=optimizer_params["opt_eps"])
    elif opt_lower == 'nadam':
        optimizer = torch.optim.Nadam(
            model.parameters(),
            lr=optimizer_params["lr"],
            weight_decay=optimizer_params["weight_decay"],
            eps=optimizer_params["opt_eps"])
    elif opt_lower == 'radam':
        optimizer = RAdam(model.parameters(),
                          lr=optimizer_params["lr"],
                          weight_decay=optimizer_params["weight_decay"],
                          eps=optimizer_params["opt_eps"])
    elif opt_lower == "adabelief":
        optimizer = AdaBelief(model.parameters(),
                              lr=optimizer_params["lr"],
                              eps=1e-8,
                              weight_decay=optimizer_params["weight_decay"])

    elif opt_lower == "adamp":
        optimizer = AdamP(model.parameters(),
                          lr=optimizer_params["lr"],
                          weight_decay=optimizer_params["weight_decay"])
    else:
        assert False and "Invalid optimizer"
        raise ValueError

    if opt_look_ahed:
        optimizer = Lookahead(optimizer, alpha=0.5, k=5)

    if scheduler_name == "CosineAnnealingWarmRestarts":
        scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
            optimizer,
            eta_min=scheduler_params["eta_min"],
            T_0=scheduler_params["T_0"],
            T_mult=scheduler_params["T_multi"],
        )
    elif scheduler_name == "WarmRestart":
        scheduler = WarmRestart(optimizer,
                                T_max=scheduler_params["T_max"],
                                T_mult=scheduler_params["T_mul"],
                                eta_min=scheduler_params["eta_min"])
    elif scheduler_name == "MultiStepLR":
        scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer,
            milestones=scheduler_params["schedule"],
            gamma=scheduler_params["gamma"])
    if scheduler_params["warmup_factor"] > 0:
        scheduler = GradualWarmupSchedulerV2(
            optimizer,
            multiplier=scheduler_params["warmup_factor"],
            total_epoch=1,
            after_scheduler=scheduler)

    return optimizer, scheduler
Пример #5
0
def main():
    """Model training."""
    train_speakers, valid_speakers = get_valid_speakers()

    # define transforms for train & validation samples
    train_transform = Compose([Resize(760, 80), ToTensor()])

    # define datasets & loaders
    train_dataset = TrainDataset('train',
                                 train_speakers,
                                 transform=train_transform)
    valid_dataset = TrainDataset('train',
                                 valid_speakers,
                                 transform=train_transform)

    train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
    valid_loader = DataLoader(valid_dataset, batch_size=256, shuffle=False)

    device = get_device()
    print(f'Selected device: {device}')

    model = torch.hub.load('huawei-noah/ghostnet',
                           'ghostnet_1x',
                           pretrained=True)
    model.classifier = nn.Linear(in_features=1280, out_features=1, bias=True)

    net = model
    net.to(device)
    criterion = nn.BCEWithLogitsLoss()
    optimizer = AdaBelief(net.parameters(), lr=1e-3)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     mode='min',
                                                     factor=0.2,
                                                     patience=3,
                                                     eps=1e-4,
                                                     verbose=True)

    # prepare valid target
    yvalid = get_valid_targets(valid_dataset)

    # training loop
    for epoch in range(10):
        loss_log = {'train': [], 'valid': []}
        train_loss = []

        net.train()
        for x, y in tqdm(train_loader):
            x, y = mixup(x, y, alpha=0.2)
            x, y = x.to(device), y.to(device, dtype=torch.float32)
            optimizer.zero_grad()
            outputs = net(x)

            loss = criterion(outputs, y.unsqueeze(1))
            loss.backward()
            optimizer.step()

            # save loss
            train_loss.append(loss.item())

        # evaluate
        net.eval()
        valid_pred = torch.Tensor([]).to(device)

        for x, y in valid_loader:
            with torch.no_grad():
                x, y = x.to(device), y.to(device, dtype=torch.float32)
                ypred = net(x)
                valid_pred = torch.cat([valid_pred, ypred], 0)

        valid_pred = sigmoid(valid_pred.cpu().numpy())
        val_loss = log_loss(yvalid, valid_pred, eps=1e-7)
        val_acc = (yvalid == (valid_pred > 0.5).astype(int).flatten()).mean()
        tqdm.write(
            f'Epoch {epoch} train_loss={np.mean(train_loss):.4f}; val_loss={val_loss:.4f}; val_acc={val_acc:.4f}'
        )

        loss_log['train'].append(np.mean(train_loss))
        loss_log['valid'].append(val_loss)
        scheduler.step(loss_log['valid'][-1])

    torch.save(net.state_dict(), 'ghostnet_model.pt')
    print('Training is complete.')
Пример #6
0
# make environment
env = gym.make(ENV_NAME)

# Setup policy, optimizer and criterion
hid_size = 256
n_layers = 4

ac_kwargs = dict(hidden_sizes=[hid_size] * n_layers)
clone_pi = GaussianActor(env.observation_space.shape[0], env.action_space.shape[0], activation=nn.LeakyReLU, **ac_kwargs)

distilled_clone_pi = DistilledGaussianActor(env.observation_space.shape[0], env.action_space.shape[0],
                                            activation=nn.LeakyReLU, n_experts=2, **ac_kwargs)

# Optimizer and criterion for ordinary clone
pi_optimizer = AdaBelief(clone_pi.parameters(), betas=(0.9, 0.999), eps=1e-16)
criterion = nn.MSELoss()

# Optimizer and criterion for distilled clone
distilled_pi_optimizer = AdaBelief(distilled_clone_pi.parameters(), betas=(0.9, 0.999), eps=1e-16)
distilled_criterion = nn.MSELoss()


####################################################################################

# Create dual clone

config_name_list = ['marigold', 'rose']

marigold_clone_distill = DistillBehavioralClone(config_name_list=config_name_list,
                                                config_name='marigold',