def main_worker(gpu, ngpus_per_node, args): args.gpu = gpu ngpus_per_node = torch.cuda.device_count() print("Use GPU: {} for training".format(args.gpu)) args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) print('==> Making model..') net = pyramidnet() torch.cuda.set_device(args.gpu) net.cuda(args.gpu) args.batch_size = int(args.batch_size / ngpus_per_node) args.num_workers = int(args.num_workers / ngpus_per_node) net = torch.nn.parallel.DistributedDataParallel(net, device_ids=[args.gpu]) num_params = sum(p.numel() for p in net.parameters() if p.requires_grad) print('The number of parameters of model is', num_params) print('==> Preparing data..') transforms_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) ]) dataset_train = CIFAR10(root='../data', train=True, download=True, transform=transforms_train) train_sampler = torch.utils.data.distributed.DistributedSampler( dataset_train) train_loader = DataLoader(dataset_train, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.num_workers, sampler=train_sampler) # there are 10 classes so the dataset name is cifar-10 classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=1e-4) train(net, criterion, optimizer, train_loader, args.gpu)
def main_worker(gpu_id, ngpus, root, args): batch_size = int(args.batch_size / ngpus) num_workers = int(args.num_workers / ngpus) Tools.print("Use GPU: {} for training".format(gpu_id)) dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=ngpus, rank=gpu_id) Tools.print('==> Making model..') net = pyramidnet() torch.cuda.set_device(gpu_id) net.cuda(gpu_id) net = torch.nn.parallel.DistributedDataParallel(net, device_ids=[gpu_id]) num_params = sum(p.numel() for p in net.parameters() if p.requires_grad) Tools.print('The number of parameters of model is {}'.format(num_params)) Tools.print('==> Preparing data..') transforms_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) ]) dataset_train = CIFAR10(root=root, train=True, download=True, transform=transforms_train) train_sampler = torch.utils.data.distributed.DistributedSampler( dataset_train) train_loader = DataLoader(dataset_train, batch_size=batch_size, shuffle=(train_sampler is None), num_workers=num_workers, sampler=train_sampler) criterion = nn.CrossEntropyLoss().cuda(gpu_id) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=1e-4) cudnn.benchmark = True for _ in range(10): Tools.print("epoch {}".format(_)) train(net, criterion, optimizer, train_loader, gpu_id) pass
def main(): if args.gpu_nums > 1: raise ValueError("gpu nums must be equal to 1.") # set run env device = 'cuda:0' if torch.cuda.is_available() else 'cpu' print('==> Preparing data..') transforms_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) ]) dataset_train = CIFAR10(root=args.dataset_dir, train=True, download=True, transform=transforms_train) train_loader = DataLoader(dataset_train, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) print('==> Making model..') model = pyramidnet() model = model.to(device) num_params = sum(p.numel() for p in model.parameters() if p.requires_grad) print('The number of parameters of model is', num_params) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) for epoch in range(1, args.epochs + 1): train(epoch, model, criterion, optimizer, train_loader, device) if args.save_model: if not path.exists(args.train_dir): mkdir(args.train_dir) torch.save(model.state_dict(), path.join(args.train_dir, "single_gpu_model.pth")) print("single gpu model has been saved.")
def main(root="/mnt/4T/Data/data/CIFAR"): device = 'cuda' if torch.cuda.is_available() else 'cpu' Tools.print('==> Preparing data..') transforms_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) ]) dataset_train = CIFAR10(root=root, train=True, download=True, transform=transforms_train) train_loader = DataLoader(dataset_train, batch_size=args.batch_size, shuffle=True, num_workers=args.num_worker) # there are 10 classes so the dataset name is cifar-10 classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') Tools.print('==> Making model..') net = pyramidnet() ############################################################# net = nn.DataParallel(net) cudnn.benchmark = True ############################################################# net = net.to(device) num_params = sum(p.numel() for p in net.parameters() if p.requires_grad) Tools.print('The number of parameters of model is {}'.format(num_params)) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=1e-4) # optimizer = optim.Adam(net.parameters(), lr=args.lr) for _ in range(10): Tools.print("epoch {}".format(_)) train(net, criterion, optimizer, train_loader, device) pass
def main(): best_acc = 0 device = 'cuda' if torch.cuda.is_available() else 'cpu' print('==> Preparing data..') transforms_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) ]) dataset_train = CIFAR10(root='../data', train=True, download=True, transform=transforms_train) train_loader = DataLoader(dataset_train, batch_size=args.batch_size, shuffle=True, num_workers=args.num_worker) # there are 10 classes so the dataset name is cifar-10 classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') print('==> Making model..') net = pyramidnet() net = parallel.DataParallelModel(net) net = net.to(device) num_params = sum(p.numel() for p in net.parameters() if p.requires_grad) print('The number of parameters of model is', num_params) criterion = nn.CrossEntropyLoss() criterion = parallel.DataParallelCriterion(criterion, device_ids=[0, 1]) # criterion = criterion.to(device) # optimizer = optim.Adam(net.parameters(),weight_decay=1e-4, lr=args.lr) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=1e-4) train(net, criterion, optimizer, train_loader, device)
def main(): # set run env if args.gpu_nums > 1: device = 'cuda' if torch.cuda.is_available() else "cpu" gpu_ids = ','.join([str(id) for id in range(args.gpu_nums)]) environ["CUDA_VISIBLE_DEVICES"] = gpu_ids else: raise ValueError("gpu-nums must be greater than 1.") print('==> Preparing data..') transforms_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))]) dataset_train = CIFAR10(root='/home/zhaopp5', train=True, download=True, transform=transforms_train) train_loader = DataLoader(dataset_train, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) print('==> Making model..') model = pyramidnet() if args.gpu_nums > 1: model = nn.DataParallel(model) model = model.to(device) num_params = sum(p.numel() for p in model.parameters() if p.requires_grad) print('The number of parameters of model is', num_params) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) for epoch in range(1, args.epochs + 1): train(epoch, model, criterion, optimizer, train_loader, device) if args.save_model: if not path.exists(args.train_dir): mkdir(args.train_dir) torch.save( model.state_dict(), path.join(args.train_dir, "data_parallel_model.pth") ) print("data parallel model has been saved.")
download=True, transform=transforms_test) train_loader = DataLoader(dataset_train, batch_size=args.batch_size, shuffle=True, num_workers=args.num_worker) test_loader = DataLoader(dataset_test, batch_size=args.batch_size_test, shuffle=False, num_workers=args.num_worker) classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') net = pyramidnet() net = net.to('cuda') num_params = sum(p.numel() for p in net.parameters() if p.requires_grad) if args.resume is not None: checkpoint = torch.load('./save_model/' + args.resume) net.load_state_dict(checkpoint['net']) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4) decay_epoch = [32000, 48000] step_lr_scheduler = lr_scheduler.MultiStepLR(optimizer,
def main(): best_acc = 0 # 뭔지 모르겠다만 device = 'cuda' if torch.cuda.is_available() else 'cpu' # 디바이스는 쿠다를 사용한다 쿠다거 어베일러블 한다면. else인 경우는 cpu를 사용. print('==> Preparing data..') # 데이터 준비는 TORCH VISION의 TRANSFROM 라이브러리를 사용한다. PYTORCH의 데이터 전처리 패키지라고 한다. transforms_train = transforms.Compose([ # 아닌가? transforms_TRAIN 은 COMPOSE METHOD를 사용하는데, transforms.RandomCrop(32, padding=4), # 여러 ARGS를 지정하는데. 그냥 데이터 섞는 방식인거 같은데? transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))]) dataset_train = CIFAR10(root='../data', train=True, download=True, # 진짜 데이터는 여기 있다. CIFAR10을 갖고 와서 저장한다. TORCH VISION에 들어있네 transform=transforms_train) # TEST만 별도로 갖고 올 수도 있고, 없으면 DOWNLOAD해서 쓰나봄. train_loader = DataLoader(dataset_train, batch_size=args.batch_size, shuffle=True, num_workers=args.num_worker # 데이터 로더는 데이터셋 트레인 가지고 , 기존에 지정한 batch size와 shuffle을 하고, num workers도 지정.CPU갯수다이건 # there are 10 classes so the dataset name is cifar-10 classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') # CIFAR CLASS 지정하고 print('==> Making model..') net = pyramidnet() # 이거네. PYRAMIDNET 구조 갖고오고. 이건 같은 폴더에 model.py에서 갖고옴. net = net.to(device) # 이걸 디바이스에 넣나봄. CUDA에? num_params = sum(p.numel() for p in net.parameters() if p.requires_grad) # NET의 PARAMETER들 하나하나를 뽑아서 파라미터 수의ㅏ 합을 저장하네 이건 왜ㅕ하지 print('The number of parameters of model is', num_params) # 아 파라미터 숫자 보여주려고. 이런 식으로 뺄 수 있구만 criterion = nn.CrossEntropyLoss() # CRITERION은 LOSS FUNCTION을 이렇게 부르는 건가? CROSSENTROPY를 썼다. nn 안에 들어있네. optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=1e-4) # OPTIMIZER는 Stochastic Gradinet Descent를 사용. lr은 기 입력된 대로. monentum. weight_decay도 적용 train(net, criterion, optimizer, train_loader, device) # 그리고 TRAIN METHOD 이건 아래에 TRAIN METHOD 별도 선언한 것. def train(net, criterion, optimizer, train_loader, device): net.train() train_loss = 0 correct = 0 total = 0 epoch_start = time.time() for batch_idx, (inputs, targets) in enumerate(train_loader): start = time.time() inputs = inputs.to(device) targets = targets.to(device) outputs = net(inputs) loss = criterion(outputs, targets) optimizer.zero_grad() loss.backward() optimizer.step() train_loss += loss.item() _, predicted = outputs.max(1) total += targets.size(0) correct += predicted.eq(targets).sum().item() acc = 100 * correct / total batch_time = time.time() - start if batch_idx % 20 == 0: print('Epoch: [{}/{}]| loss: {:.3f} | acc: {:.3f} | batch time: {:.3f}s '.format( batch_idx, len(train_loader), train_loss/(batch_idx+1), acc, batch_time)) elapse_time = time.time() - epoch_start elapse_time = datetime.timedelta(seconds=elapse_time) print("Training time {}".format(elapse_time)) if __name__=='__main__': main()
def main_worker(gpu, ngpus_per_node, args): # init the process group dist.init_process_group(backend=args.dist_backend, init_method=args.init_method, world_size=args.world_size, rank=args.rank) torch.cuda.set_device(gpu) print("From Rank: {}, Use GPU: {} for training".format(args.rank, gpu)) print('From Rank: {}, ==> Making model..'.format(args.rank)) net = pyramidnet() net.cuda(gpu) args.batch_size = int(args.batch_size / ngpus_per_node) print("batch_size: ", args.batch_size) net = torch.nn.parallel.DistributedDataParallel(net, device_ids=[gpu], output_device=gpu) num_params = sum(p.numel() for p in net.parameters() if p.requires_grad) print( 'From Rank: {}, The number of parameters of model is'.format( args.rank), num_params) print('From Rank: {}, ==> Preparing data..'.format(args.rank)) transforms_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) ]) dataset_train = CIFAR10(root=args.dataset_dir, train=True, download=True, transform=transforms_train) train_sampler = torch.utils.data.distributed.DistributedSampler( dataset_train) train_loader = DataLoader(dataset_train, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.num_workers, sampler=train_sampler) criterion = nn.CrossEntropyLoss().cuda(gpu) optimizer = optim.SGD(net.parameters(), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1) for epoch in range(1, args.epochs + 1): train(epoch, net, criterion, optimizer, train_loader, args.rank) scheduler.step() if args.save_model: if not path.exists(args.train_dir): mkdir(args.train_dir) # if args.rank == 0: torch.save( net.module.state_dict(), path.join(args.train_dir, "distributed_data_parallel_{}.pth".format(args.rank))) print("From Rank: {}, model saved.".format(args.rank))
def main(): args = parser.parse_args() #init the process group dist.init_process_group(backend=args.dist_backend, init_method=args.init_method, world_size=args.world_size, rank=args.rank) #set cuda device for use gpu_devices = ','.join([str(id) for id in args.gpu_devices]) os.environ["CUDA_VISIBLE_DEVICES"] = gpu_devices print("From Rank: {}, Use GPU: {} for training".format( args.rank, gpu_devices)) print('From Rank: {}, ==> Making model..'.format(args.rank)) net = pyramidnet() net.cuda() args.batch_size = int(args.batch_size / args.world_size) args.num_workers = int(args.num_workers / args.world_size) net = torch.nn.parallel.DistributedDataParallel( net, device_ids=args.gpu_devices, output_device=args.gpu_devices[0]) num_params = sum(p.numel() for p in net.parameters() if p.requires_grad) print( 'From Rank: {}, The number of parameters of model is'.format( args.rank), num_params) print('From Rank: {}, ==> Preparing data..'.format(args.rank)) transforms_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) ]) dataset_train = CIFAR10(root='./data', train=True, download=True, transform=transforms_train) train_sampler = torch.utils.data.distributed.DistributedSampler( dataset_train) train_loader = DataLoader(dataset_train, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.num_workers, sampler=train_sampler) # there are 10 classes so the dataset name is cifar-10 classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=1e-4) scheduler = lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1) for epoch in range(args.max_epochs): train(epoch, net, criterion, optimizer, train_loader, args.rank) scheduler.step() # if args.rank == 0: torch.save(net.module.state_dict(), "final_model_rank_{}.pth".format(args.rank)) print("From Rank: {}, model saved.".format(args.rank))