示例#1
0
def run_process():
    '''Run process

    This is what is actually run on each process.
    '''
    # Setup this process
    local_rank, rank, world_size = setup(verbose=True)

    # Initialize data_loader
    input_size = 5
    output_size = 1
    batch_size = 30
    data_size = 100

    data_loader = DataLoader(
        dataset=RandomDataset(input_size, data_size),
        batch_size=batch_size,
        shuffle=True,
    )

    # Initialize model and attach to optimizer
    model = Model(input_size, output_size, verbose=False)

    device = torch.device(f"cuda:{local_rank}")
    model.to(device)

    opt = optim.SGD(model.parameters(), lr=0.01)

    # Parallelize
    model = DistributedDataParallel(
        model,
        device_ids=[device],
        output_device=device,
    )

    # Actual training
    n_epochs = 10
    for epoch in range(n_epochs):
        model.train()
        for data, target in data_loader:
            opt.zero_grad()

            input = data.to(device)
            target = target.to(device)
            output = model(input)

            loss = (output - target).pow(2).mean(0)
            loss.backward()
            opt.step()

        if rank == 0:
            print(epoch)

    # Cleanup process
    cleanup()

    return model
def checkOverfit(num_samples, input_dims, hidden_dims, labels, num_tries=3):
    for _ in range(num_tries):
        random_dataset = RandomDataset(num_samples=num_samples, input_dims=input_dims, useLabels=True, labels=labels)
        
        model = SiameseNetwork(input_dims=input_dims, hidden_dims=hidden_dims, doConv=False)
        
        trainer = Trainer(random_dataset, model=model, model_parameters=model.parameters,
                            batch_size=8, lr=1, shuffle=True, doValidation=False)
        
        trainer.train(num_epochs=30)  # This should print status
        
        if trainer.training_error_plot[-1] == 0:
            return True
    return False
示例#3
0
import torch.nn as nn
import torch
from torch.utils.data import Dataset, DataLoader
from model import MYmodel
from dataset import RandomDataset

if __name__ == "__main__":
    # init parameters
    input_size = 5
    output_size = 2
    batch_size = 30
    data_size = 100

    # get dataloaders
    rand_loader = DataLoader(dataset=RandomDataset(input_size, data_size),
                             batch_size=batch_size,
                             shuffle=True)

    # get device
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # get model
    model = MYmodel(input_size, output_size)
    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
        model = nn.DataParallel(model)
    else:
        print("Let's use cpu!")

    model.to(device)
示例#4
0
def test_dataset():
    dataset = RandomDataset(args)
    sampleM = dataset.sample_M(args.batch_size, args.dim, args.miss_p)
    trainset, testset, start, rare_indexes, rate = dataset.load_snp()
    print(trainset.shape, testset.shape, start, rare_index.shape, rare)
def run_process():
    '''Run process

    This is what is actually run on each process.
    '''
    # Setup this process
    setup(verbose=True)

    # Initialize data_loader
    input_size = 5
    output_size = 1
    batch_size = 30
    data_size = 100

    data_loader = DataLoader(
        dataset=RandomDataset(input_size, data_size),
        batch_size=batch_size,
        shuffle=True,
    )

    # Initialize model and attach to optimizer
    model = Model(input_size, output_size, verbose=False)

    device = torch.device(f"cuda:{hvd.local_rank()}")
    model.to(device)

    opt = optim.SGD(model.parameters(), lr=0.01)

    # Parallelize
    # Broadcast parameters & optimizer state.
    hvd.broadcast_parameters(model.state_dict(), root_rank=0)
    hvd.broadcast_optimizer_state(opt, root_rank=0)

    # Wrap optimizer with DistributedOptimizer.
    opt = hvd.DistributedOptimizer(
        opt,
        named_parameters=model.named_parameters(),
    )

    # Actual training
    n_epochs = 10
    for epoch in range(n_epochs):
        model.train()
        for data, target in data_loader:
            opt.zero_grad()

            input = data.to(device)
            target = target.to(device)
            output = model(input)

            loss = (output - target).pow(2).mean(0)
            loss.backward()
            opt.step()

        if hvd.rank() == 0:
            print(epoch)

    # Cleanup process
    cleanup()

    return model