Пример #1
0
def run(device: torch.device, epochs: int = 1, batch_size: int = 4096) -> None:
    rank = dist.get_rank()
    model = torchvision.models.resnet50().to(device)
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([.5, .5, .5], [.5, .5, .5])
    ])
    dataset = torchvision.datasets.CIFAR10('/data/private/datasets',
                                           train=True,
                                           transform=transform,
                                           download=True)
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

    # partition.partiton_dataset is same as the one in pytorch tutorial
    # https://pytorch.org/tutorials/intermediate/dist_tuto.html#distributed-training
    train_set, bsz = partition.partition_dataset(dataset, batch_size)
    num_batches = ceil(len(train_set.dataset)) / float(bsz)

    for epoch in range(epochs):
        epoch_loss = .0
        for data, target in train_set:
            optimizer.zero_grad()
            data = data.to(
                device
            )  # all of processes are locked here at the second iteration
            target = target.to(device)
            outputs = model(data)
            loss = F.cross_entropy(outputs, target)
            epoch_loss += loss.item()
            loss.backward()
            average_gradients(model)
            optimizer.step()
        print(f'Rank {rank}, epoch {epoch} : {epoch_loss / num_batches}')
def train_teacher(nb_teachers, teacher_id):
    """
  This function trains a single teacher model with responds teacher's ID among an ensemble of nb_teachers
  models for the dataset specified.
  The model will be save in directory. 
  :param nb_teachers: total number of teachers in the ensemble
  :param teacher_id: id of the teacher being trained
  :return: True if everything went well
  """
    # Load the dataset
    X_train, X_test, y_train, y_test = models.get_dataset()

    print(X_train.shape)
    print(y_train.shape)
    print(X_test.shape)
    print(y_test.shape)

    # Retrieve subset of data for this teacher
    data, labels = partition.partition_dataset(X_train, y_train, nb_teachers,
                                               teacher_id)

    print("Length of training data: " + str(len(labels)))

    # Define teacher checkpoint filename and full path

    filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '.hdf5'
    filename2 = str(nb_teachers) + '_teachers_' + str(teacher_id) + '.h5'

    # Perform teacher training need to modify

    # Create teacher model
    model, opt = models.create_two_layer_mlp(46)  # num of cols
    model.compile(loss='binary_crossentropy',
                  optimizer="Adam",
                  metrics=['accuracy'])
    model, hist = models.training(model, data, X_test, labels, y_test,
                                  filename)

    #modify
    model_json = model.to_json()
    with open("model.json", "w") as json_file:
        json_file.write(model_json)


# serialize weights to HDF5
    model.save_weights(filename2)
    print("Saved model to disk")
    return True
Пример #3
0
def run(rank, size, partition_sizes, custom_partition=False, params=None):
    torch.manual_seed(1234)
    train_set, _ = partition_dataset(partition_sizes, train=True, custom=custom_partition)
    val_set, _ = partition_dataset(partition_sizes, train=False)
    train_model(train_set, val_set, **params)
Пример #4
0
DATASETS_PATH = "./datasets/"

if __name__ == '__main__':
    #file = "chainlink3D.arff"
    file = "banana.arff"
    M = 2

    partitioning_method = 0

    #L = 0.4
    #MIN_PTS = 4
    L = 0.03
    MIN_PTS = 4

    arf = prt.partition_dataset(file, M, partitioning_method)
    dimensions = len(arf[0][0]) - 1

    # for i in range(M):
    #    points, labels = arff.loadpartitionNDArray(i)
    #    plt.plotCluster(points, labels, message=f'Partition {i}')

    contribution_map = {}
    for i in range(M):
        local_update = lcl.compute_local_update(i, L)
        for key, value in local_update.items():
            if key in contribution_map:
                contribution_map[key] += value
            else:
                contribution_map[key] = value