def run_process(): '''Run process This is what is actually run on each process. ''' # Setup this process local_rank, rank, world_size = setup(verbose=True) # Initialize data_loader input_size = 5 output_size = 1 batch_size = 30 data_size = 100 data_loader = DataLoader( dataset=RandomDataset(input_size, data_size), batch_size=batch_size, shuffle=True, ) # Initialize model and attach to optimizer model = Model(input_size, output_size, verbose=False) device = torch.device(f"cuda:{local_rank}") model.to(device) opt = optim.SGD(model.parameters(), lr=0.01) # Parallelize model = DistributedDataParallel( model, device_ids=[device], output_device=device, ) # Actual training n_epochs = 10 for epoch in range(n_epochs): model.train() for data, target in data_loader: opt.zero_grad() input = data.to(device) target = target.to(device) output = model(input) loss = (output - target).pow(2).mean(0) loss.backward() opt.step() if rank == 0: print(epoch) # Cleanup process cleanup() return model
def checkOverfit(num_samples, input_dims, hidden_dims, labels, num_tries=3): for _ in range(num_tries): random_dataset = RandomDataset(num_samples=num_samples, input_dims=input_dims, useLabels=True, labels=labels) model = SiameseNetwork(input_dims=input_dims, hidden_dims=hidden_dims, doConv=False) trainer = Trainer(random_dataset, model=model, model_parameters=model.parameters, batch_size=8, lr=1, shuffle=True, doValidation=False) trainer.train(num_epochs=30) # This should print status if trainer.training_error_plot[-1] == 0: return True return False
import torch.nn as nn import torch from torch.utils.data import Dataset, DataLoader from model import MYmodel from dataset import RandomDataset if __name__ == "__main__": # init parameters input_size = 5 output_size = 2 batch_size = 30 data_size = 100 # get dataloaders rand_loader = DataLoader(dataset=RandomDataset(input_size, data_size), batch_size=batch_size, shuffle=True) # get device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # get model model = MYmodel(input_size, output_size) if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs model = nn.DataParallel(model) else: print("Let's use cpu!") model.to(device)
def test_dataset(): dataset = RandomDataset(args) sampleM = dataset.sample_M(args.batch_size, args.dim, args.miss_p) trainset, testset, start, rare_indexes, rate = dataset.load_snp() print(trainset.shape, testset.shape, start, rare_index.shape, rare)
def run_process(): '''Run process This is what is actually run on each process. ''' # Setup this process setup(verbose=True) # Initialize data_loader input_size = 5 output_size = 1 batch_size = 30 data_size = 100 data_loader = DataLoader( dataset=RandomDataset(input_size, data_size), batch_size=batch_size, shuffle=True, ) # Initialize model and attach to optimizer model = Model(input_size, output_size, verbose=False) device = torch.device(f"cuda:{hvd.local_rank()}") model.to(device) opt = optim.SGD(model.parameters(), lr=0.01) # Parallelize # Broadcast parameters & optimizer state. hvd.broadcast_parameters(model.state_dict(), root_rank=0) hvd.broadcast_optimizer_state(opt, root_rank=0) # Wrap optimizer with DistributedOptimizer. opt = hvd.DistributedOptimizer( opt, named_parameters=model.named_parameters(), ) # Actual training n_epochs = 10 for epoch in range(n_epochs): model.train() for data, target in data_loader: opt.zero_grad() input = data.to(device) target = target.to(device) output = model(input) loss = (output - target).pow(2).mean(0) loss.backward() opt.step() if hvd.rank() == 0: print(epoch) # Cleanup process cleanup() return model