Пример #1
0
def train_model(model, criterion, train_loader, ps_stub):
    start_time = time.time()

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(device)
    model = model.to(device)

    # Training for 1 epoch
    cum_loss = 0.0
    correct = 0
    model.train()

    for x, y in tqdm(train_loader):
        x, y = x.to(device), y.to(device)
        # optimizer.zero_grad()

        print("Fetching model")
        model_proto = ps_stub.GetModel(ModelRequest())
        load_proto(model, model_proto)
        print("Model fetched")

        outputs = model(x)
        loss = criterion(outputs, y)

        loss.backward()
        # optimizer.step()

        print("Sending gradients")
        ps_stub.UpdateGradients(gradients_to_proto(model))
        print("Gradients sent")

        with torch.no_grad():
            _, pred = outputs.max(1)
            correct += (pred == y).sum().item()
            cum_loss += loss.item()

    n_train = len(train_loader.dataset)
    print(f"Finished in {time.time() - start_time} seconds.")
    print(f"Train acc={correct / n_train}, train loss={cum_loss / n_train}.")
Пример #2
0
train_loader = DataLoader(mnist_train,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=0)
val_loader = DataLoader(mnist_test,
                        batch_size=batch_size,
                        shuffle=False,
                        num_workers=0)

### MODEL DEFINITION
model = create_model()

optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
criterion = nn.CrossEntropyLoss()

mock_ps = MockPS(create_model(), lr=0.001)

# Train for 1 epoch
train_model(model, criterion, optimizer, train_loader, mock_ps)

# Create new model and train
model2 = create_model()
optimizer2 = optim.SGD(model2.parameters(), lr=0.001, momentum=0.9)
train_model(model2, criterion, optimizer2, train_loader, mock_ps)

model3 = create_model()
optimizer3 = optim.SGD(model3.parameters(), lr=0.001, momentum=0.9)
load_proto(model3, mock_ps.get_model())

train_model(model3, criterion, optimizer3, train_loader, mock_ps)
train_model(model3, criterion, optimizer3, train_loader, mock_ps)
Пример #3
0
print(len(params))
print(params[0].shape)
# for param in model.parameters():
print("====Gradients")
print(params[0].grad)

x = torch.ones(2, 2, requires_grad=True)
print(x)

y = x * x + 2

print(y)
print(y.grad_fn)

out = y.mean()

print(out.grad_fn)

out.backward()
print(x.grad)
print(y.grad)

print(pickle.dumps(x.grad))
print(pickle.loads(pickle.dumps(x.grad)))

proto = model_to_proto(model)
print(len(proto.weights))

load_proto(model, proto)

# print(dir(pickle.loads(model_proto.weights[0].value)))
Пример #4
0
### MODEL DEFINITION
model = create_model()

optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
criterion = nn.CrossEntropyLoss()

print(model)
model_proto = model_to_proto(model, log=True)
# print(len(model_proto.weights))
# print(pickle.loads(model_proto.weights[0].value))

print([name for name, _ in model.named_parameters()])

# Train for 1 epoch and save to proto
train_model(model, criterion, optimizer, train_loader)
model_proto = model_to_proto(model)

# Create new model and load from protobuf
model2 = create_model()
optimizer2 = optim.SGD(model2.parameters(), lr=0.001, momentum=0.9)
load_proto(model2, model_proto)
print("Models are the same?", compare_models(model, model2))
train_model(model2, criterion, optimizer2, train_loader)

# Reset to 1 epoch completed
load_proto(model2, model_proto)
train_model(model2, criterion, optimizer2, train_loader)

# Train for another epoch
train_model(model2, criterion, optimizer2, train_loader)