Exemplo n.º 1
0
def validation(validation_data, model, global_step, t_vocab_size, val_writer,
               opt):
    model.eval()
    total_loss = 0.0
    total_cnt = 0
    for batch in validation_data:
        inputs, i_mask = None, None
        if opt.has_inputs:
            inputs = batch.src
            i_mask = utils.create_pad_mask(inputs, opt.src_pad_idx)
        targets = batch.trg
        t_mask = utils.create_pad_mask(targets, opt.trg_pad_idx)
        t_self_mask = utils.create_trg_self_mask(targets)

        with torch.no_grad():
            pred = model(inputs, targets, i_mask, t_self_mask, t_mask)

            pred = pred.view(-1, pred.size(-1))
            ans = targets.view(-1)
            loss = utils.get_loss(pred, ans, t_vocab_size, 0,
                                  opt.trg_pad_idx)
        total_loss += loss.item() * len(batch)
        total_cnt += len(batch)

    val_loss = total_loss / total_cnt
    print("Validation Loss", val_loss)
    val_writer.add_scalar('loss', val_loss, global_step)
    return val_loss
Exemplo n.º 2
0
def train(train_data, model, opt, global_step, optimizer, t_vocab_size,
          label_smoothing, writer):
    model.train()
    last_time = time.time()
    pbar = tqdm(total=len(train_data.dataset), ascii=True)
    for batch in train_data:
        inputs = None
        if opt.has_inputs:
            inputs = batch.src

        targets = batch.trg
        pred = model(inputs, targets)

        pred = pred.view(-1, pred.size(-1))
        ans = targets.view(-1)

        loss = utils.get_loss(pred, ans, t_vocab_size, label_smoothing,
                              opt.trg_pad_idx)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if global_step % 100 == 0:
            summarize_train(writer, global_step, last_time, model, opt, inputs,
                            targets, optimizer, loss, pred, ans)
            last_time = time.time()

        pbar.set_description('[Loss: {:.4f}]'.format(loss.item()))

        global_step += 1
        pbar.update(targets.size(0))

    pbar.close()
    train_data.reload_examples()
    return global_step
Exemplo n.º 3
0
def run(cfg):
    '''Load save path'''
    cfg.log_string('Data save path: %s' % (cfg.save_path))
    checkpoint = CheckpointIO(cfg)
    '''Load device'''
    cfg.log_string('Loading device settings.')
    device = load_device(cfg)
    '''Load data'''
    cfg.log_string('Loading dataset.')
    train_loader = get_dataloader(cfg.config, mode='train')
    test_loader = get_dataloader(cfg.config, mode='test')
    '''Load net'''
    cfg.log_string('Loading model.')
    net = get_model(cfg.config, device=device)
    if isinstance(net, list):
        checkpoint.register_modules(voxnet=net[0])
        checkpoint.register_modules(refnet=net[1])
    else:
        checkpoint.register_modules(voxnet=net)

    cfg.log_string('loading loss function')
    loss_func = get_loss(cfg.config, device)
    '''Load optimizer'''
    cfg.log_string('Loading optimizer.')
    optimizer = get_optimizer(config=cfg.config, net=net)
    if isinstance(net, list):
        checkpoint.register_modules(voxopt=optimizer[0])
        checkpoint.register_modules(refopt=optimizer[1])
    else:
        checkpoint.register_modules(voxopt=optimizer)
    '''Load scheduler'''
    cfg.log_string('Loading optimizer scheduler.')
    scheduler = load_scheduler(config=cfg.config, optimizer=optimizer)
    if isinstance(net, list):
        checkpoint.register_modules(voxsch=scheduler[0])
        checkpoint.register_modules(refsch=scheduler[1])
    else:
        checkpoint.register_modules(voxsch=scheduler)
    '''Load trainer'''
    cfg.log_string('Loading trainer.')
    trainer = get_trainer(cfg.config)
    '''Start to train'''
    cfg.log_string('Start to train.')
    #cfg.log_string('Total number of parameters in {0:s}: {1:d}.'.format(cfg.config['method'], sum(p.numel() for p in net.parameters())))

    trainer(cfg,
            net,
            loss_func,
            optimizer,
            scheduler,
            train_loader=train_loader,
            test_loader=test_loader,
            device=device,
            checkpoint=checkpoint)

    cfg.log_string('Training finished.')
Exemplo n.º 4
0
def train(model, dataloader, device, optimizer_name, loss_name, lr):
    optimizer_object = get_optimizer(optimizer_name)
    optimizer = optimizer_object(model.parameters(), lr=lr)

    loss_fn = get_loss(loss_name)

    model.train()

    running_loss = 0.0
    running_corrects = 0

    for inputs, targets in dataloader:
        inputs = inputs.to(device)
        targets = targets.to(device)

        bs = len(targets)
        classes = torch.zeros((bs, 10))
        for i in range(bs):
            classes[i][targets[i]] = 1
        classes = classes.to(device)

        outputs = model(inputs)
        loss = loss_fn()(outputs,
                         classes)  # LeCun & al. used Maximum Log Likehood

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        _, preds = torch.max(outputs.data, 1)
        # statistics
        running_loss += loss.item()
        running_corrects += torch.sum(preds == targets.data)

    loss = running_loss / 60000
    acc = running_corrects.data.item() / 60000
    print('Training results: Loss: {:.4f} Acc: {:.4f}'.format(loss, acc))

    return acc
Exemplo n.º 5
0
def train(model, dataloader, device, optimizer_name, loss_name, lr, verbose):
    optimizer_object = get_optimizer(optimizer_name)
    optimizer = optimizer_object(model.parameters(), lr=lr)

    loss_fn = get_loss(loss_name)

    model.train()

    running_loss = 0.0
    running_corrects = 0

    for inputs, targets in dataloader:
        inputs = inputs.to(device)
        targets = targets.to(device)

        bs = len(targets)
        classes = torch.zeros((bs, 10))
        for i in range(bs):
            classes[i][targets[i]] = 1
        classes = classes.to(device)

        outputs = model(inputs)
        loss = loss_fn()(outputs, classes)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        _, preds = torch.max(outputs.data, 1)

        running_loss += loss.item()
        running_corrects += torch.sum(preds == targets.data)

    loss = running_loss / 60000
    acc = running_corrects.data.item() / 60000
    if verbose:
        print(f'Training results: Loss: {loss:.4f} Acc: {acc:.4f}')

    return acc
Exemplo n.º 6
0
def train(args):
    # Get hardware device
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # Check if weights and biases integration is enabled.
    if args.wandb == 1:
        import wandb
        wandb.init(entity='surajpai',
                   project='FacialEmotionRecognition',
                   config=vars(args))

    # Get the dataset with "Training" usage.
    dataset = FER2013Dataset(args.data_path, "Training")

    # Randomly split the dataset into train and validation based on the specified train_split argument
    train_dataset, validation_dataset = torch.utils.data.random_split(
        dataset, [
            int(len(dataset) * args.train_split),
            len(dataset) - int(len(dataset) * args.train_split)
        ])

    logging.info(
        'Samples in the training set: {}\n Samples in the validation set: {} \n\n'
        .format(len(train_dataset), len(validation_dataset)))

    # Get class weights as inverse of frequencies from class occurences in the dataset.
    dataset_summary = dataset.get_summary_statistics()
    class_weights = (1 / dataset_summary["class_occurences"])
    class_weights = torch.Tensor(class_weights /
                                 np.sum(class_weights)).to(device)

    # Train loader and validation loader initialized with batch_size as specified and randomly shuffled
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True,
                              pin_memory=True)
    val_loader = DataLoader(validation_dataset,
                            batch_size=args.batch_size,
                            shuffle=True,
                            pin_memory=True)

    # Model initialization
    model = torch.nn.DataParallel(Model(args.model_config)).to(device)

    # Set torch optimizer
    optimizer = torch.optim.Adam(model.parameters(), )

    # Get loss for training the network from the utils get_loss function
    criterion = get_loss(args, class_weights)
    bestLoss = -1000

    # Create metric logger object
    metrics = Metrics(upload=args.wandb)

    # Define augmentation transforms, if --augment is enabled
    if args.augment == 1:
        transform = transforms.RandomChoice([
            transforms.RandomHorizontalFlip(p=0.75),
            transforms.RandomAffine(15,
                                    translate=(0.1, 0.1),
                                    scale=(1.2, 1.2),
                                    shear=15),
            transforms.ColorJitter()
        ])

    # Start iterating over the total number of epochs set by epochs argument
    for n_epoch in range(args.epochs):

        # Reset running metrics at the beginning of each epoch.
        metrics.reset()

        # Utils logger
        logging.info(' Starting Epoch: {}/{} \n'.format(n_epoch, args.epochs))
        '''

        TRAINING

        '''

        # Model in train mode for batch-norm and dropout related ops.
        model.train()

        # Iterate over each batch in the train loader
        for idx, batch in enumerate(tqdm(train_loader)):

            # Reset gradients
            optimizer.zero_grad()

            # Apply augmentation transforms, if --augment is enabled
            if args.augment == 1 and n_epoch % 2 == 0:
                batch = apply_transforms(batch, transform)

            # Move the batch to the device, needed explicitly if GPU is present
            image, target = batch["image"].to(device), batch["emotion"].to(
                device)

            # Run a forward pass over images from the batch
            out = model(image)

            # Calculate loss based on the criterion set
            loss = criterion(out, target)

            # Backward pass from the final loss
            loss.backward()

            # Update the optimizer
            optimizer.step()

            # Update metrics for this batch
            metrics.update_train({
                "loss": loss.item(),
                "predicted": out,
                "ground_truth": target
            })
        '''

        VALIDATION

        '''

        logging.info(' Validating on the validation split ... \n \n')

        # Model in eval mode.
        model.eval()

        # Set no grad to disable gradient saving.
        with torch.no_grad():

            # Iterate over each batch in the val loader
            for idx, batch in enumerate(val_loader):

                # Move the batch to the device, needed explicitly if GPU is present
                image, target = batch["image"].to(device), batch["emotion"].to(
                    device)

                # Forward pass
                out = model(image)

                # Calculate loss based on the criterion set
                loss = criterion(out, target)

                # Metrics and sample predictions updated for validation batch
                metrics.update_val({
                    "loss": loss.item(),
                    "predicted": out,
                    "ground_truth": target,
                    "image": image,
                    "class_mapping": dataset.get_class_mapping()
                })

        # Display metrics at the end of each epoch
        metrics.display()

        # Weight Checkpointing to save the best model on validation loss
        save_path = "./saved_models/{}.pth.tar".format(
            args.model_config.split('/')[-1].split('.')[0])
        bestLoss = min(bestLoss, metrics.metric_dict["loss@val"])
        is_best = (bestLoss == metrics.metric_dict["loss@val"])
        save_checkpoint(
            {
                'epoch': n_epoch,
                'state_dict': model.state_dict(),
                'bestLoss': bestLoss,
                'optimizer': optimizer.state_dict(),
            }, is_best, save_path)

    # After training is completed, if weights and biases is enabled, visualize filters and upload final model.
    if args.wandb == 1:
        visualize_filters(model.modules())
        wandb.save(save_path)

    # Get report from the metrics logger
    train_report, val_report = metrics.get_report()

    # Save the report to csv files
    train_report.to_csv("{}_trainreport.csv".format(
        save_path.rstrip(".pth.tar")))
    val_report.to_csv("{}_valreport.csv".format(save_path.rstrip(".pth.tar")))