def main_worker(local_rank, args): args.local_rank = local_rank # prepare dist environment dist.init_process_group(backend='nccl', rank=args.local_rank, world_size=args.world_size) torch.cuda.set_device(args.local_rank) network = Xception(num_classes=cfg.num_classes) network = network.cuda() network = torch.nn.parallel.DistributedDataParallel( network, device_ids=[args.local_rank]) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(network.parameters(), lr=cfg.lr_init, momentum=cfg.SGD_momentum) dataloader_train = create_dataset_pytorch_imagenet_dist_train( data_path=args.data_path + 'train', local_rank=local_rank, n_workers=cfg.n_workers) dataloader_test = create_dataset_pytorch_imagenet( data_path=args.data_path + 'val', is_train=False, n_workers=cfg.n_workers) step_per_epoch = len(dataloader_train) scheduler = optim.lr_scheduler.StepLR(optimizer, gamma=cfg.lr_decay_rate, step_size=cfg.lr_decay_epoch * step_per_epoch) summary_writer = None if local_rank == 0: summary_writer = SummaryWriter(log_dir='./summary') trainer = Trainer(network=network, criterion=criterion, optimizer=optimizer, scheduler=scheduler, dataloader_train=dataloader_train, dataloader_test=dataloader_test, summary_writer=summary_writer, epoch_size=cfg.epoch_size, ckpt_path=args.ckpt_path, local_rank=local_rank) for epoch_id in range(cfg.epoch_size): trainer.step() if local_rank == 0: summary_writer.close()
def main(): # parser = argparse.ArgumentParser(description = 'Deep Learning Framework 1 Argument') # parser.add_argument('--epochs', type = int, default = 100, metavar = 'N', help = 'number of epochs to train and test the model (default=100)') # args = parser.parse_args() transform = transforms.Compose([ transforms.Grayscale(num_output_channels=1), transforms.ToTensor(), transforms.Normalize([ 0.4161, ], [ 0.1688, ]), ]) train_dataset = datasets.ImageFolder('cropped_trainset', transform) test_dataset = datasets.ImageFolder('cropped_testset', transform) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=3, shuffle=True, num_workers=2) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=2) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = Xception().to(device) optimizer = optim.Adam(model.parameters(), lr=0.001) scheduler = StepLR(optimizer, step_size=1, gamma=0.8) # set you own epoch for epoch in range(100): train(model, device, train_loader, optimizer, epoch) test(model, device, test_loader) """ use train and test function to train and test your model """ save_models(model)
default=0, help='device id of GPU. (Default: 0)') args = parser.parse_args() args.local_rank = 0 args.world_size = 1 network = Xception(num_classes=cfg.num_classes) # network = nn.DataParallel(network) network = network.cuda() criterion = nn.CrossEntropyLoss() # optimizer = optim.RMSprop(network.parameters(), # lr=cfg.lr_init, # eps=cfg.rmsprop_epsilon, # momentum=cfg.rmsprop_momentum, # alpha=cfg.rmsprop_decay) optimizer = optim.SGD(network.parameters(), lr=cfg.lr_init, momentum=cfg.SGD_momentum) # prepare data # dataloader = create_dataset_pytorch(args.data_path + "/train") pipe = HybridTrainPipe(batch_size=cfg.batch_size, num_threads=cfg.n_workers, device_id=args.local_rank, data_dir=args.data_path, crop=cfg.image_size, local_rank=args.local_rank, world_size=args.world_size) pipe.build() dataloader = DALIClassificationIterator(pipe, reader_name="Reader") step_per_epoch = dataloader.size / cfg.batch_size print("step_per_epoch =", step_per_epoch)