예제 #1
0
def main_worker(local_rank, args):
    args.local_rank = local_rank
    # prepare dist environment
    dist.init_process_group(backend='nccl',
                            rank=args.local_rank,
                            world_size=args.world_size)
    torch.cuda.set_device(args.local_rank)
    network = Xception(num_classes=cfg.num_classes)
    network = network.cuda()
    network = torch.nn.parallel.DistributedDataParallel(
        network, device_ids=[args.local_rank])
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(network.parameters(),
                          lr=cfg.lr_init,
                          momentum=cfg.SGD_momentum)
    dataloader_train = create_dataset_pytorch_imagenet_dist_train(
        data_path=args.data_path + 'train',
        local_rank=local_rank,
        n_workers=cfg.n_workers)
    dataloader_test = create_dataset_pytorch_imagenet(
        data_path=args.data_path + 'val',
        is_train=False,
        n_workers=cfg.n_workers)

    step_per_epoch = len(dataloader_train)
    scheduler = optim.lr_scheduler.StepLR(optimizer,
                                          gamma=cfg.lr_decay_rate,
                                          step_size=cfg.lr_decay_epoch *
                                          step_per_epoch)
    summary_writer = None
    if local_rank == 0:
        summary_writer = SummaryWriter(log_dir='./summary')
    trainer = Trainer(network=network,
                      criterion=criterion,
                      optimizer=optimizer,
                      scheduler=scheduler,
                      dataloader_train=dataloader_train,
                      dataloader_test=dataloader_test,
                      summary_writer=summary_writer,
                      epoch_size=cfg.epoch_size,
                      ckpt_path=args.ckpt_path,
                      local_rank=local_rank)

    for epoch_id in range(cfg.epoch_size):
        trainer.step()

    if local_rank == 0:
        summary_writer.close()
예제 #2
0
def main():
    # parser = argparse.ArgumentParser(description = 'Deep Learning Framework 1 Argument')
    # parser.add_argument('--epochs', type = int, default = 100, metavar = 'N', help = 'number of epochs to train and test the model (default=100)')
    # args = parser.parse_args()
    transform = transforms.Compose([
        transforms.Grayscale(num_output_channels=1),
        transforms.ToTensor(),
        transforms.Normalize([
            0.4161,
        ], [
            0.1688,
        ]),
    ])

    train_dataset = datasets.ImageFolder('cropped_trainset', transform)
    test_dataset = datasets.ImageFolder('cropped_testset', transform)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=3,
                                               shuffle=True,
                                               num_workers=2)

    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=2)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = Xception().to(device)

    optimizer = optim.Adam(model.parameters(), lr=0.001)
    scheduler = StepLR(optimizer, step_size=1, gamma=0.8)

    # set you own epoch
    for epoch in range(100):
        train(model, device, train_loader, optimizer, epoch)
        test(model, device, test_loader)
        """
		use train and test function to train and test your model

		"""
    save_models(model)
예제 #3
0
                        default=0,
                        help='device id of GPU. (Default: 0)')
    args = parser.parse_args()
    args.local_rank = 0
    args.world_size = 1

    network = Xception(num_classes=cfg.num_classes)
    # network = nn.DataParallel(network)
    network = network.cuda()
    criterion = nn.CrossEntropyLoss()
    #     optimizer = optim.RMSprop(network.parameters(),
    #                                 lr=cfg.lr_init,
    #                                 eps=cfg.rmsprop_epsilon,
    #                                 momentum=cfg.rmsprop_momentum,
    #                                 alpha=cfg.rmsprop_decay)
    optimizer = optim.SGD(network.parameters(),
                          lr=cfg.lr_init,
                          momentum=cfg.SGD_momentum)
    # prepare data
    # dataloader = create_dataset_pytorch(args.data_path + "/train")
    pipe = HybridTrainPipe(batch_size=cfg.batch_size,
                           num_threads=cfg.n_workers,
                           device_id=args.local_rank,
                           data_dir=args.data_path,
                           crop=cfg.image_size,
                           local_rank=args.local_rank,
                           world_size=args.world_size)
    pipe.build()
    dataloader = DALIClassificationIterator(pipe, reader_name="Reader")
    step_per_epoch = dataloader.size / cfg.batch_size
    print("step_per_epoch =", step_per_epoch)