示例#1
0
            loss.backward()
            optimizers[worker].step()
            wcounter += 1
            if wcounter == tau:
                break

    avg_index = int((r % (N_w / 2)) * 2)
    ps_functions.average_model(nets[avg_index], nets[avg_index + 1])
    ps_functions.synch_weight(nets[avg_index + 1], nets[avg_index])
    for n in range(N_w):
        if n != avg_index and n != avg_index + 1:
            ps_functions.average_model2(nets[n], nets[avg_index])
    if (r * tau) % 120 == 0:
        ps_functions.initialize_zero(ps_model)
        for n in range(N_w):
            ps_functions.weight_accumulate(nets[n], ps_model, N_w)
        correct = 0
        total = 0
        with torch.no_grad():
            for data in testloader:
                images, labels = data
                images, labels = images.to(device), labels.to(device)
                outputs = ps_model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print('Accuracy of the network on the 10000 test images: %d %%' %
              (100 * correct / total))
        results[0][res_ind] = 100 * correct / total
        res_ind += 1
                    gradient_average(nets[c][n], ps_model, lr)

                i = 0
                c += 1

            if c == num_cl:
                c = 0
                per += 1
                iter_ind += 1
            if per == period:

                ps_functions.ps_param_zero(ps_model)

                for cl in range(num_cl):
                    # ps_functions.gradient_accumulate(old_nets[cl][0], nets[cl][0], ps_model, is_avg, num_cl)
                    ps_functions.weight_accumulate(nets[cl][0], ps_model,
                                                   num_cl)

                # ps_functions.sparse_grad(top_k_ps, ps_model, device)

                for cl in range(num_cl):
                    for n in range(num_w_per_cluster):
                        # old_nets[cl][n] = ps_functions.gradient_average(nets[cl][n], ps_model, scale)
                        ps_functions.weight_broadcast(nets[cl][n], ps_model)

                per = 0
                i = 0
                c = 0
            ##### Change lr for next iteration during the warm up phase #####################
            ps_functions.warmup_lr(optimizers, num_cl, num_w_per_cluster, lr,
                                   iter_ind, max_ind)
示例#3
0
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            optimizers[worker].zero_grad()
            preds = nets[worker](inputs)
            loss = criterions[worker](preds, labels)
            loss.backward()
            ps_functions.synch_weight(reserveNets[worker], nets[worker])
            break

    for worker in range(N_w):
        ps_functions.initialize_zero(nets[worker])
        index = worker - 4
        if index < 0:
            index += N_w
        for i in range(5):
            ps_functions.weight_accumulate(
                reserveNets[int((index + (i * 2)) % N_w)], nets[worker], 5)
        optimizers[worker].step()

    # w_index sends its model to other workers
    # # other workers upon receiving the model take the average
    for n in range(N_w):
        if n != w_index:
            ps_functions.average_model(nets[n], nets[w_index])

    if (r % 100) == 0 and r != 0:
        ## reset of extraModel
        ps_functions.initialize_zero(avg_model)  # model

        ## take average
        for worker in range(N_w):
            ps_functions.weight_accumulate(nets[worker], avg_model,
示例#4
0
        if n != w_index:
            ps_functions.average_model(nets[n], nets[w_index])

    # averaging the momentums
    for n in range(N_w):
        if n != w_index:
            ps_functions.average_momentum(optimizers[n], optimizers[w_index])

    if (r % 100) == 0 and r != 0:
        ## reset of extraModel
        ps_functions.initialize_zero(avg_model)  # model
        ps_functions.momentum_zero(avg_Optimizer)

        ## take average
        for worker in range(N_w):
            ps_functions.weight_accumulate(nets[worker], avg_model,
                                           N_w)  # model
            ps_functions.momentum_accumulate(avg_Optimizer, optimizers[worker],
                                             N_w)
        ##assign all worker models
        for worker in range(N_w):
            ps_functions.synch_weight(nets[worker], avg_model)  # model
            ps_functions.momentum_Avg(avg_Optimizer,
                                      optimizers[worker])  # momentum
    if r % 100 == 0:
        ps_functions.initialize_zero(ps_model)
        for n in range(N_w):
            ps_functions.weight_accumulate(nets[n], ps_model, N_w)
        correct = 0
        total = 0
        with torch.no_grad():
            for data in testloader:
示例#5
0
            loss = criterions[worker](preds, labels)
            loss.backward()
            break
    #4
    for worker in range(N_w):
        ps_functions.weight_dif(netsCurrent[worker], netsOLD[worker],
                                netsDif[worker])
    for worker in range(N_w):
        ps_functions.synch_weight(nets[worker], netsAvg[worker])  #5
        totalRand = 0
        rand = abs(np.random.normal(1, 0, N_n))
        normalizationFactor = sum(rand) / N_n
        for i in range(N_n):
            neighbor = int(connectionMatrix[worker][i])
            constant = (N_n + 1) * normalizationFactor / rand[i]
            ps_functions.weight_accumulate(netsDif[neighbor], nets[worker],
                                           constant)  #PWdif & 6
        ps_functions.weight_accumulate(netsDif[worker], nets[worker], N_n + 1)
        ps_functions.synch_weight(netsAvg[worker], nets[worker])  #7
        optimizers[worker].step()  #8
        ps_functions.synch_weight(netsOLD[worker], netsCurrent[worker])  #9
        ps_functions.synch_weight(netsCurrent[worker], nets[worker])  #10

    if r % 100 == 0:
        ps_functions.initialize_zero(ps_model)
        for n in range(N_w):
            ps_functions.weight_accumulate(nets[n], ps_model, N_w)
        correct = 0
        total = 0
        with torch.no_grad():
            for data in testloader:
                images, labels = data
示例#6
0
            optimizers[worker].zero_grad()
            preds = nets[worker](inputs)
            loss = criterions[worker](preds, labels)
            loss.backward()
            break
    #4
    for worker in range(N_w):
        ps_functions.weight_dif(netsCurrent[worker], netsOLD[worker],
                                netsDif[worker])
    for worker in range(N_w):
        ps_functions.synch_weight(nets[worker], netsAvg[worker])  #5
        index = worker - N_n
        if index < 0:
            index += N_w
        for i in range(N_n + 1):
            ps_functions.weight_accumulate(netsDif[int(
                (index + (i * 2)) % N_w)], nets[worker], N_n + 1)  #PWdif & 6
        ps_functions.synch_weight(netsAvg[worker], nets[worker])  #7
        optimizers[worker].step()  #8
        ps_functions.synch_weight(netsOLD[worker], netsCurrent[worker])  #9
        ps_functions.synch_weight(netsCurrent[worker], nets[worker])  #10

    # w_index sends its model to other workers
    # # other workers upon receiving the model take the average

    if r % 100 == 0:
        ps_functions.initialize_zero(ps_model)
        for n in range(N_w):
            ps_functions.weight_accumulate(nets[n], ps_model, N_w)
        correct = 0
        total = 0
        with torch.no_grad():