def create_trainer():
    model = Baseline(bert_vocab_num=24000,
                     emb_dim=300,
                     hidden_dim=256,
                     output_dim=3).to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)
    criterion = torch.nn.CrossEntropyLoss()
    # criterion = FocalLoss(num_classes=3)

    trainer = Trainer(model, optimizer, criterion, NUM_EPOCH, device)
    return trainer
示例#2
0
def run(dataset_train, dataset_dev, dataset_test, model_type, word_embed_size,
        hidden_size, batch_size, use_cuda, n_epochs):

    if model_type == 'base':
        model = Baseline(vocab=dataset_train.vocab,
                         word_embed_size=word_embed_size,
                         hidden_size=hidden_size,
                         use_cuda=use_cuda,
                         inference=False)
    else:
        raise NotImplementedError
    if use_cuda:
        model = model.cuda()

    optim_params = model.parameters()
    optimizer = optim.Adam(optim_params, lr=10**-3)

    print('start training')
    for epoch in range(n_epochs):
        train_loss, tokens, preds, golds = train(dataset_train, model,
                                                 optimizer, batch_size, epoch,
                                                 Phase.TRAIN, use_cuda)

        dev_loss, tokens, preds, golds = train(dataset_dev, model, optimizer,
                                               batch_size, epoch, Phase.DEV,
                                               use_cuda)
        logger = '\t'.join([
            'epoch {}'.format(epoch + 1),
            'TRAIN Loss: {:.9f}'.format(train_loss),
            'DEV Loss: {:.9f}'.format(dev_loss)
        ])
        print('\r' + logger, end='')
    test_loss, tokens, preds, golds = train(dataset_test, model, optimizer,
                                            batch_size, epoch, Phase.TEST,
                                            use_cuda)
    print('====', 'TEST', '=====')
    print_scores(preds, golds)
    output_results(tokens, preds, golds)
示例#3
0
#%%
data, label = load_data(data_path, label_path, 'indian_pines')
#%%
get_value_data(data, label)
#%%
DATA = pd.read_csv('datasets/Indian_pines.csv', header=None).values
data_D = DATA[:, :-1]
data_L = DATA[:, -1]
data_train, data_test, label_train, label_test = train_test_split(
    data_D, data_L, test_size=0.8)
#%%
train_set = GetLoader(data_train, label_train)
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
val_set = GetLoader(data_test, label_test)
val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=False)
#%%
data_p, label_p = next(iter(train_loader))
# print(data_p[:-1])
#%%
net = Baseline(INPUT_CHANNELS, CLASSES, dropout=False)
optimizer = optim.Adam(net.parameters(), lr=0.0001)
weight = torch.ones(CLASSES)
weight[torch.LongTensor([0])] = 0.
w = weight.to(DEVICE)
criterion = nn.CrossEntropyLoss(weight=w)
#%%
train_loss, val_accuracy = train(net, optimizer, criterion, train_loader,
                                 val_loader, EPOCH, DEVICE)

plot_curve(train_loss)
plot_curve(val_accuracy)
示例#4
0
                                               num_workers=0,
                                               shuffle=True)
    return train_loader


if __name__ == "__main__":
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # Assuming that we are on a CUDA machine, this should print a CUDA device:
    epochs = 100
    best_val_loss = 999999
    print(device)
    net = Baseline()
    net.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=0.005, momentum=0.9)
    for epoch in range(epochs):
        with tqdm(total=len(load_dataset(train_path))) as epoch_pbar:
            epoch_pbar.set_description(f'Epoch {epoch}')
            running_loss = 0.0
            running_val_loss = 0.0
            for i, data in enumerate(load_dataset(train_path)):
                # get the inputs; data is a list of [inputs, labels]
                inputs = data[0].to(device)
                labels = data[1].to(device)
                outputs = net(inputs)
                loss = criterion(outputs, labels)
                running_loss += loss.item()
                # zero the parameter gradients
                optimizer.zero_grad()
                # forward + backward + optimize
示例#5
0
    report = parser.report(end='<br>')
    vis.text(report, win='report f{}'.format(FG.cur_fold))

    torch.cuda.set_device(FG.devices[0])
    device = torch.device(FG.devices[0])

    net = Baseline(FG.ckpt_dir, len(FG.labels))
    # net = Baseline3D(FG.ckpt_dir, len(FG.labels))

    if len(FG.devices) > 1:
        net = torch.nn.DataParallel(net, device_ids=FG.devices)
        print(net.module)
    else:
        print(net)

    optimizer = Adam(net.parameters(), lr=FG.lr, weight_decay=FG.l2_decay)
    scheduler = ExponentialLR(optimizer, gamma=FG.lr_gamma)

    trainloader, testloader = get_dataloader(k=FG.fold,
                                             cur_fold=FG.cur_fold,
                                             modality=FG.modality,
                                             axis=FG.axis,
                                             labels=FG.labels,
                                             batch_size=FG.batch_size)

    trainer = create_supervised_trainer(net,
                                        optimizer,
                                        F.cross_entropy,
                                        device=device,
                                        non_blocking=True)
示例#6
0
valid_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=BATCH_SIZE,
                                           num_workers=4,
                                           collate_fn=collate_fn,
                                           sampler=valid_sampler)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=1,
                                          num_workers=4,
                                          shuffle=False)

config = {
    "epochs": 100,
    "device": get_device(),
    "sampling": True,
    "temperature": 1.0,
    "max_sentence_length": 18
}

embedding_dim = 256
hidden_dim = 512
vocab_size = len(vocab)
model = Baseline(embedding_dim, hidden_dim, vocab_size, vanilla=False)

criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=5e-4)

model.cuda()
train(model, optimizer, criterion, train_loader, valid_loader, vocab, config)
test(model, criterion, test_loader, vocab, config)
示例#7
0
def main(args):
    # Set up logging and devices
    args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True)
    log = util.get_logger(args.save_dir, args.name)
    tbx = SummaryWriter(args.save_dir)
    device, args.gpu_ids = util.get_available_devices()
    log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}')
    args.batch_size *= max(1, len(args.gpu_ids))

    # Set random seed
    log.info(f'Using random seed {args.seed}...')
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    # Get embeddings
    log.info('Loading embeddings...')
    word_vectors = util.torch_from_json(args.word_emb_file)
    char_vectors = util.torch_from_json(args.char_emb_file)
    # Get model
    log.info('Building model...')

    if (args.model == 'baseline'):
        model = Baseline(word_vectors=word_vectors,
                         hidden_size=args.hidden_size,
                         drop_prob=args.drop_prob)
        optimizer = optim.Adadelta(model.parameters(),
                                   args.lr,
                                   weight_decay=args.l2_wd)

    elif (args.model == 'bidaf'):
        model = BiDAF(word_vectors=word_vectors,
                      char_vectors=char_vectors,
                      char_emb_dim=args.char_emb_dim,
                      hidden_size=args.hidden_size,
                      drop_prob=args.drop_prob)
        optimizer = optim.Adadelta(model.parameters(),
                                   args.lr,
                                   weight_decay=args.l2_wd)

    elif (args.model == 'qanet'):
        model = QANet(word_vectors=word_vectors,
                      char_vectors=char_vectors,
                      char_emb_dim=args.char_emb_dim,
                      hidden_size=args.hidden_size,
                      n_conv_emb_enc=args.n_conv_emb,
                      n_conv_mod_enc=args.n_conv_mod,
                      drop_prob_word=0.1,
                      drop_prob_char=0.05,
                      kernel_size_emb_enc_block=7,
                      kernel_size_mod_enc_block=7,
                      n_heads=args.n_heads)
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               betas=(args.beta_1, args.beta_2),
                               eps=args.epsilon,
                               weight_decay=args.l2_wd)

    elif (args.model == 'qanet_out'):
        model = QANet(word_vectors=word_vectors,
                      char_vectors=char_vectors,
                      char_emb_dim=args.char_emb_dim,
                      hidden_size=args.hidden_size,
                      n_conv_emb_enc=args.n_conv_emb,
                      n_conv_mod_enc=args.n_conv_mod,
                      drop_prob_word=0.1,
                      drop_prob_char=0.05,
                      kernel_size_emb_enc_block=7,
                      kernel_size_mod_enc_block=7,
                      n_heads=args.n_heads)
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               betas=(args.beta_1, args.beta_2),
                               eps=args.epsilon,
                               weight_decay=args.l2_wd)

    model = nn.DataParallel(model, args.gpu_ids)
    if args.load_path:
        log.info(f'Loading checkpoint from {args.load_path}...')
        model, step = util.load_model(model, args.load_path, args.gpu_ids)
    else:
        step = 0
    model = model.to(device)
    model.train()
    ema = util.EMA(model, args.ema_decay)

    # Get saver
    saver = util.CheckpointSaver(args.save_dir,
                                 max_checkpoints=args.max_checkpoints,
                                 metric_name=args.metric_name,
                                 maximize_metric=args.maximize_metric,
                                 log=log)

    # Get optimizer and scheduler
    scheduler = sched.LambdaLR(optimizer, lambda s: 1.)  # Constant LR

    # Get data loader
    log.info('Building dataset...')
    train_dataset = SQuAD(args.train_record_file, args.use_squad_v2)
    train_loader = data.DataLoader(train_dataset,
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers,
                                   collate_fn=collate_fn)
    dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2)
    dev_loader = data.DataLoader(dev_dataset,
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=args.num_workers,
                                 collate_fn=collate_fn)

    # Train
    log.info('Training...')
    steps_till_eval = args.eval_steps
    epoch = step // len(train_dataset)
    while epoch != args.num_epochs:
        epoch += 1
        log.info(f'Starting epoch {epoch}...')
        with torch.enable_grad(), \
                tqdm(total=len(train_loader.dataset)) as progress_bar:
            for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader:
                # Setup for forward
                cw_idxs = cw_idxs.to(device)
                qw_idxs = qw_idxs.to(device)
                cc_idxs = cc_idxs.to(device)
                qc_idxs = qc_idxs.to(device)
                batch_size = cw_idxs.size(0)
                optimizer.zero_grad()

                # Forward
                log_p1, log_p2 = model(cw_idxs, cc_idxs, qw_idxs, qc_idxs)
                y1, y2 = y1.to(device), y2.to(device)
                loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
                loss_val = loss.item()

                # Backward
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(),
                                         args.max_grad_norm)
                optimizer.step()
                scheduler.step(step // batch_size)
                ema(model, step // batch_size)

                # Log info
                step += batch_size
                progress_bar.update(batch_size)
                progress_bar.set_postfix(epoch=epoch, NLL=loss_val)
                tbx.add_scalar('train/NLL', loss_val, step)
                tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'],
                               step)

                steps_till_eval -= batch_size
                if steps_till_eval <= 0:
                    steps_till_eval = args.eval_steps

                    # Evaluate and save checkpoint
                    log.info(f'Evaluating at step {step}...')
                    ema.assign(model)
                    results, pred_dict = evaluate(model, dev_loader, device,
                                                  args.dev_eval_file,
                                                  args.max_ans_len,
                                                  args.use_squad_v2)
                    saver.save(step, model, results[args.metric_name], device)
                    ema.resume(model)

                    # Log to console
                    results_str = ', '.join(f'{k}: {v:05.2f}'
                                            for k, v in results.items())
                    log.info(f'Dev {results_str}')

                    # Log to TensorBoard
                    log.info('Visualizing in TensorBoard...')
                    for k, v in results.items():
                        tbx.add_scalar(f'dev/{k}', v, step)
                    util.visualize(tbx,
                                   pred_dict=pred_dict,
                                   eval_path=args.dev_eval_file,
                                   step=step,
                                   split='dev',
                                   num_visuals=args.num_visuals)
示例#8
0
def main():
    saver = utils.Saver(opt)

    # randomize seed
    opt.manualSeed = random.randint(1, 10000)  # fix seed
    random.seed(opt.manualSeed)
    torch.manual_seed(opt.manualSeed)
    torch.cuda.manual_seed_all(opt.manualSeed)

    # load data
    root = "data/modelnet40_ply_hdf5_2048/"  #"data/modelnet40_normal_resampled"#
    use_cuda = torch.cuda.is_available()

    transforms_list = []
    random_permute = utils.Random_permute(opt.num_points, delta=opt.distance)
    # load transformations
    if opt.random_input:
        print("random_input")
        transforms_list.append(random_permute)

    # Load dataset / data loader
    train_dataset = data.ModelNetDataset(
        root,
        train=True,
        sort=opt.sort,
        transform=transforms.Compose(transforms_list),
        distance=opt.distance,
        normal=opt.normal)
    train_loader = DataLoader(train_dataset,
                              batch_size=opt.batchSize,
                              shuffle=True,
                              num_workers=opt.workers)

    test_dataset = data.ModelNetDataset(root,
                                        train=False,
                                        sort=opt.sort,
                                        distance=opt.distance,
                                        normal=opt.normal)
    test_loader = DataLoader(test_dataset,
                             batch_size=opt.batchSize,
                             shuffle=False,
                             num_workers=opt.workers)

    # define model
    ndim = 6 if opt.distance or opt.normal else 3
    if opt.model == 'lstm':
        model = Baseline(input_dim=ndim, maxout=opt.elem_max)
    elif opt.model == 'lstm_mlp':
        model = LSTM_mlp(input_dim=ndim,
                         maxout=opt.elem_max,
                         mlp=[64, 128, 256, 512],
                         fc=[512, 256, 40])
    elif opt.model == 'test':
        model = Test(input_dim=ndim, maxout=opt.elem_max)

    # load speicified pre-trained model
    if opt.path != '':
        model.load_state_dict(torch.load(opt.path))

    # define optimizer and loss function
    optimizer = optim.Adam(model.parameters(),
                           lr=opt.learning_rate,
                           weight_decay=1e-5)
    criterion = nn.CrossEntropyLoss()

    # transfer model and criterion to cuda if exist
    if use_cuda:
        model = model.cuda(
        )  #nn.DataParallel(model).cuda()#model.cuda() #nn.DataParallel(model).cuda()
        criterion = criterion.cuda()

    best_model_wts = model.state_dict()

    early_stopping = utils.Early_stopping(opt.early_stopping, patience=15)

    saver.log_parameters(model.parameters())

    for epoch in range(opt.nepoch):
        adjust_learning_rate(optimizer, epoch, saver)

        train(model, optimizer, criterion, saver, train_loader, epoch)

        test_loss = test(model, criterion, saver, test_loader, epoch)

        early_stopping.update(test_loss)
        if early_stopping.stop():
            break

    saver.save_result()
示例#9
0
class Trainer(BaseTrainer):
    def __init__(self, config):
        super(Trainer, self).__init__(config)
        self.datamanager = DataManger(config["data"])

        # model
        self.model = Baseline(
            num_classes=self.datamanager.datasource.get_num_classes("train")
        )

        # summary model
        summary(
            self.model,
            input_size=(3, 256, 128),
            batch_size=config["data"]["batch_size"],
            device="cpu",
        )

        # losses
        cfg_losses = config["losses"]
        self.criterion = Softmax_Triplet_loss(
            num_class=self.datamanager.datasource.get_num_classes("train"),
            margin=cfg_losses["margin"],
            epsilon=cfg_losses["epsilon"],
            use_gpu=self.use_gpu,
        )

        self.center_loss = CenterLoss(
            num_classes=self.datamanager.datasource.get_num_classes("train"),
            feature_dim=2048,
            use_gpu=self.use_gpu,
        )

        # optimizer
        cfg_optimizer = config["optimizer"]
        self.optimizer = torch.optim.Adam(
            self.model.parameters(),
            lr=cfg_optimizer["lr"],
            weight_decay=cfg_optimizer["weight_decay"],
        )

        self.optimizer_centerloss = torch.optim.SGD(
            self.center_loss.parameters(), lr=0.5
        )

        # learing rate scheduler
        cfg_lr_scheduler = config["lr_scheduler"]
        self.lr_scheduler = WarmupMultiStepLR(
            self.optimizer,
            milestones=cfg_lr_scheduler["steps"],
            gamma=cfg_lr_scheduler["gamma"],
            warmup_factor=cfg_lr_scheduler["factor"],
            warmup_iters=cfg_lr_scheduler["iters"],
            warmup_method=cfg_lr_scheduler["method"],
        )

        # track metric
        self.train_metrics = MetricTracker("loss", "accuracy")
        self.valid_metrics = MetricTracker("loss", "accuracy")

        # save best accuracy for function _save_checkpoint
        self.best_accuracy = None

        # send model to device
        self.model.to(self.device)

        self.scaler = GradScaler()

        # resume model from last checkpoint
        if config["resume"] != "":
            self._resume_checkpoint(config["resume"])

    def train(self):
        for epoch in range(self.start_epoch, self.epochs + 1):
            result = self._train_epoch(epoch)

            if self.lr_scheduler is not None:
                self.lr_scheduler.step()

            result = self._valid_epoch(epoch)

            # add scalars to tensorboard
            self.writer.add_scalars(
                "Loss",
                {
                    "Train": self.train_metrics.avg("loss"),
                    "Val": self.valid_metrics.avg("loss"),
                },
                global_step=epoch,
            )
            self.writer.add_scalars(
                "Accuracy",
                {
                    "Train": self.train_metrics.avg("accuracy"),
                    "Val": self.valid_metrics.avg("accuracy"),
                },
                global_step=epoch,
            )

            # logging result to console
            log = {"epoch": epoch}
            log.update(result)
            for key, value in log.items():
                self.logger.info("    {:15s}: {}".format(str(key), value))

            # save model
            if (
                self.best_accuracy == None
                or self.best_accuracy < self.valid_metrics.avg("accuracy")
            ):
                self.best_accuracy = self.valid_metrics.avg("accuracy")
                self._save_checkpoint(epoch, save_best=True)
            else:
                self._save_checkpoint(epoch, save_best=False)

            # save logs
            self._save_logs(epoch)

    def _train_epoch(self, epoch):
        """Training step"""
        self.model.train()
        self.train_metrics.reset()
        with tqdm(total=len(self.datamanager.get_dataloader("train"))) as epoch_pbar:
            epoch_pbar.set_description(f"Epoch {epoch}")
            for batch_idx, (data, labels, _) in enumerate(
                self.datamanager.get_dataloader("train")
            ):
                # push data to device
                data, labels = data.to(self.device), labels.to(self.device)

                # zero gradient
                self.optimizer.zero_grad()
                self.optimizer_centerloss.zero_grad()

                with autocast():
                    # forward batch
                    score, feat = self.model(data)

                    # calculate loss and accuracy
                    loss = (
                        self.criterion(score, feat, labels)
                        + self.center_loss(feat, labels) * self.config["losses"]["beta"]
                    )
                    _, preds = torch.max(score.data, dim=1)

                # backward parameters
                # loss.backward()
                self.scaler.scale(loss).backward()

                # backward parameters for center_loss
                for param in self.center_loss.parameters():
                    param.grad.data *= 1.0 / self.config["losses"]["beta"]

                # optimize
                # self.optimizer.step()
                self.scaler.step(self.optimizer)
                self.optimizer_centerloss.step()

                self.scaler.update()

                # update loss and accuracy in MetricTracker
                self.train_metrics.update("loss", loss.item())
                self.train_metrics.update(
                    "accuracy",
                    torch.sum(preds == labels.data).double().item() / data.size(0),
                )

                # update process bar
                epoch_pbar.set_postfix(
                    {
                        "train_loss": self.train_metrics.avg("loss"),
                        "train_acc": self.train_metrics.avg("accuracy"),
                    }
                )
                epoch_pbar.update(1)
        return self.train_metrics.result()

    def _valid_epoch(self, epoch):
        """Validation step"""
        self.model.eval()
        self.valid_metrics.reset()
        with torch.no_grad():
            with tqdm(total=len(self.datamanager.get_dataloader("val"))) as epoch_pbar:
                epoch_pbar.set_description(f"Epoch {epoch}")
                for batch_idx, (data, labels, _) in enumerate(
                    self.datamanager.get_dataloader("val")
                ):
                    # push data to device
                    data, labels = data.to(self.device), labels.to(self.device)

                    with autocast():
                        # forward batch
                        score, feat = self.model(data)

                        # calculate loss and accuracy
                        loss = (
                            self.criterion(score, feat, labels)
                            + self.center_loss(feat, labels)
                            * self.config["losses"]["beta"]
                        )
                        _, preds = torch.max(score.data, dim=1)

                    # update loss and accuracy in MetricTracker
                    self.valid_metrics.update("loss", loss.item())
                    self.valid_metrics.update(
                        "accuracy",
                        torch.sum(preds == labels.data).double().item() / data.size(0),
                    )

                    # update process bar
                    epoch_pbar.set_postfix(
                        {
                            "val_loss": self.valid_metrics.avg("loss"),
                            "val_acc": self.valid_metrics.avg("accuracy"),
                        }
                    )
                    epoch_pbar.update(1)
        return self.valid_metrics.result()

    def _save_checkpoint(self, epoch, save_best=True):
        """save model to file"""
        state = {
            "epoch": epoch,
            "state_dict": self.model.state_dict(),
            "center_loss": self.center_loss.state_dict(),
            "optimizer": self.optimizer.state_dict(),
            "optimizer_centerloss": self.optimizer_centerloss.state_dict(),
            "lr_scheduler": self.lr_scheduler.state_dict(),
            "best_accuracy": self.best_accuracy,
        }
        filename = os.path.join(self.checkpoint_dir, "model_last.pth")
        self.logger.info("Saving last model: model_last.pth ...")
        torch.save(state, filename)
        if save_best:
            filename = os.path.join(self.checkpoint_dir, "model_best.pth")
            self.logger.info("Saving current best: model_best.pth ...")
            torch.save(state, filename)

    def _resume_checkpoint(self, resume_path):
        """Load model from checkpoint"""
        if not os.path.exists(resume_path):
            raise FileExistsError("Resume path not exist!")
        self.logger.info("Loading checkpoint: {} ...".format(resume_path))
        checkpoint = torch.load(resume_path, map_location=self.map_location)
        self.start_epoch = checkpoint["epoch"] + 1
        self.model.load_state_dict(checkpoint["state_dict"])
        self.center_loss.load_state_dict(checkpoint["center_loss"])
        self.optimizer.load_state_dict(checkpoint["optimizer"])
        self.optimizer_centerloss.load_state_dict(checkpoint["optimizer_centerloss"])
        self.lr_scheduler.load_state_dict(checkpoint["lr_scheduler"])
        self.best_accuracy = checkpoint["best_accuracy"]
        self.logger.info(
            "Checkpoint loaded. Resume training from epoch {}".format(self.start_epoch)
        )

    def _save_logs(self, epoch):
        """Save logs from google colab to google drive"""
        if os.path.isdir(self.logs_dir_saved):
            shutil.rmtree(self.logs_dir_saved)
        destination = shutil.copytree(self.logs_dir, self.logs_dir_saved)