def train_main(): model = Net().to(device) optimizer = optim.SGD(model.parameters(), lr=0.05) criterion = nn.BCELoss() print(model) batch_size = 25 train_loader = get_train_loader(batch_size) validation_loader = get_validation_loader(batch_size) log = get_tensorboard('simple') epochs = 50 start_time = datetime.now() for epoch in range(1, epochs + 1): train(model, train_loader, criterion, optimizer, epoch, log) with torch.no_grad(): print('\nValidation:') evaluate(model, validation_loader, criterion, epoch, log) end_time = datetime.now() print('Total training time: {}.'.format(end_time - start_time)) torch.save(model.state_dict(), model_file) print('Wrote model to', model_file)
def train_main(): model = Net() num_gpus = torch.cuda.device_count() if num_gpus > 1: print('Using multi-gpu with {} GPUs!'.format(num_gpus)) model = nn.DataParallel(model) model.to(device) optimizer = optim.SGD(model.parameters(), lr=0.05) criterion = nn.BCELoss() print(model) batch_size = 25 * num_gpus train_loader = get_train_loader(batch_size) validation_loader = get_validation_loader(batch_size) log = get_tensorboard('simple_multigpu') epochs = 50 start_time = datetime.now() for epoch in range(1, epochs + 1): train(model, train_loader, criterion, optimizer, epoch, log) with torch.no_grad(): print('\nValidation:') evaluate(model, validation_loader, criterion, epoch, log) end_time = datetime.now() print('Total training time: {}.'.format(end_time - start_time)) torch.save(model.module.state_dict(), model_file) print('Wrote model to', model_file)
def main(): model = Net() batch_size = 25 train_loader = get_train_loader(batch_size) validation_loader = get_validation_loader(batch_size) trainer = pl.Trainer(gpus=-1, max_epochs=50, accelerator='ddp') # trainer = pl.Trainer(gpus=1, max_epochs=50, accelerator='horovod', checkpoint_callback=False) start_time = datetime.now() trainer.fit(model, train_loader, validation_loader) end_time = datetime.now() print('Total training time: {}.'.format(end_time - start_time)) # torch.save(model.state_dict(), model_file) # print('Wrote model to', model_file) test_loader = get_test_loader(batch_size) trainer.test(test_dataloaders=test_loader)
def train_main(): model = Net().to(device) # optimizer = optim.SGD(model.parameters(), lr=0.05) print(model) # Horovod: broadcast parameters. hvd.broadcast_parameters(model.state_dict(), root_rank=0) # Horovod: scale learning rate by the number of GPUs. lr = 0.05 optimizer = optim.SGD(model.parameters(), lr=lr * hvd.size()) # Horovod: wrap optimizer with DistributedOptimizer. optimizer = hvd.DistributedOptimizer( optimizer, named_parameters=model.named_parameters()) criterion = nn.BCELoss() batch_size = 25 train_loader = get_train_loader(batch_size) validation_loader = get_validation_loader(batch_size) log = get_tensorboard('simple') epochs = 50 start_time = datetime.now() for epoch in range(1, epochs + 1): train(model, train_loader, criterion, optimizer, epoch, log) with torch.no_grad(): print('\nValidation:') evaluate(model, validation_loader, criterion, epoch, log) end_time = datetime.now() print('Total training time: {}.'.format(end_time - start_time)) torch.save(model.state_dict(), model_file) print('Wrote model to', model_file)
def train_main(): # Learning 1: New layers model = PretrainedNet().to(device) params = filter(lambda p: p.requires_grad, model.parameters()) optimizer = optim.SGD(params, lr=0.01) criterion = nn.BCELoss() print(model) batch_size = 25 train_loader = get_train_loader(batch_size) validation_loader = get_validation_loader(batch_size) log = get_tensorboard('pretrained') epochs = 10 start_time = datetime.now() for epoch in range(1, epochs + 1): train(model, train_loader, criterion, optimizer, epoch, log) with torch.no_grad(): print('\nValidation:') evaluate(model, validation_loader, criterion, epoch, log) end_time = datetime.now() print('Total training time: {}.'.format(end_time - start_time)) torch.save(model.state_dict(), model_file) print('Wrote model to', model_file) # Learning 2: Fine-tuning log = get_tensorboard('finetuned') for name, layer in model.vgg_features.named_children(): note = ' ' for param in layer.parameters(): note = '-' if int(name) >= 24: param.requires_grad = True note = '+' print(name, note, layer, len(param)) params = filter(lambda p: p.requires_grad, model.parameters()) # optimizer = optim.SGD(model.parameters(), lr=1e-3) optimizer = optim.RMSprop(params, lr=1e-5) criterion = nn.BCELoss() print(model) prev_epochs = epoch epochs = 20 start_time = datetime.now() for epoch in range(1, epochs + 1): train(model, train_loader, criterion, optimizer, prev_epochs + epoch, log) with torch.no_grad(): print('\nValidation:') evaluate(model, validation_loader, criterion, prev_epochs + epoch, log) end_time = datetime.now() print('Total training time: {}.'.format(end_time - start_time)) torch.save(model.state_dict(), model_file_ft) print('Wrote finetuned model to', model_file_ft)