def get_accu_and_loss(ps, args):
    net = model.SimpleCNN(args)
    mnist = model.download_mnist_retry(seed=1111)

    value = []
    cents = []
    begin = ray.get(ps.get_begin.remote())
    while not begin:
        time.sleep(0.001)
        begin = ray.get(ps.get_begin.remote())

    start_time = time.time()
    while True:
        the_time = time.time() - start_time
        cent = ray.get(ps.get_weights.remote())
        cents.append((the_time, cent))
        print('number of items in the cents', len(cents))
        time.sleep(1)
        if the_time > 5:
            cent_time, cent = cents.pop(0)
            net.set_flat(cent)
            xs, xy = mnist.test.next_batch(10000)
            accu, loss = net.compute_accuracy_and_loss(xs, xy)
            print()
            print('centralized_time', cent_time, 'accu:', accu, 'loss:', loss)
            print()
            value.append((cent_time, accu, loss))
            np.save(
                args.save_dir + 'centralized_num_worker%d, round %d' %
                (args.num_workers, args.round), np.array(value))
 def __init__(self, worker_index, batch_size=50):
     self.worker_index = worker_index
     self.batch_size = batch_size
     self.mnist = input_data.read_data_sets("MNIST_data",
                                            one_hot=True,
                                            seed=worker_index)
     self.net = model.SimpleCNN()
Пример #3
0
def get_accu_and_loss(ps, args):
    net = model.SimpleCNN(args)
    mnist = model.download_mnist_retry(seed=1111)
    start_time = time.time()
    value = []
    master_weights = []
    current_time = time.time() - start_time
    while current_time < args.stop_time:
        weights = ray.get(ps.get_master_weight.remote())
        master_weights.append((current_time, weights))
        if current_time > 5:
            t, w = master_weights.pop(0)
            net.set_flat(w)
            xs, xy = mnist.test.next_batch(2000)
            accu, loss = net.compute_accuracy_and_loss(xs, xy)
            print()
            # print(['*']*10)
            print('master_time', t, 'accu:', accu, 'testing loss:', loss)
            # print(['*']*10)
            print()
            value.append((t, accu, loss))
            np.save(
                args.save_dir +
                'federated_num_worker%d, k_%d, round_%d, net_lrn_%.6f, FL_lrn_%6f'
                % (args.num_workers, args.k, args.round, args.net_lrn,
                   args.lrns[0]), np.array(value))
        time.sleep(1)
        current_time = time.time() - start_time
Пример #4
0
def worker_task(ps, worker_index, batch_size=50):
    # Download MNIST.
    mnist = model.download_mnist_retry(seed=worker_index)

    # Initialize the model.
    net = model.SimpleCNN()
    keys = net.get_weights()[0]

    while True:
        # Get the current weights from the parameter server.
        weights = ray.get(ps.pull.remote(keys))
        net.set_weights(keys, weights)

        # Compute an update and push it to the parameter server.
        xs, ys = mnist.train.next_batch(batch_size)
        gradients = net.compute_update(xs, ys)
        ps.push.remote(keys, gradients)
def get_accu_and_loss(ps, args):
    net = model.SimpleCNN(args)
    mnist = model.download_mnist_retry(seed=1111)

    # before we start the training, check all the loss value is set which means all workers are ready
    while True:
        losses = ray.get(ps.get_loss.remote())
        if None not in losses:
            print("begin")
            start_time = time.time()
            break
        else:
            time.sleep(0.0001)

    value = []
    cents = []
    current_time = time.time() - start_time
    while current_time < args.stop_time:
        all_weights_ids = ray.get(ps.get_weights_ids.remote())
        all_weights = np.array(
            [ray.get(all_weights_ids[i]) for i in range(args.num_workers)])
        cent = np.mean(all_weights, axis=0)
        cents.append((current_time, cent))
        if current_time > 5:
            cent_time, cent = cents.pop(0)
            net.set_flat(cent)
            xs, xy = mnist.test.next_batch(10000)
            accu, loss = net.compute_accuracy_and_loss(xs, xy)
            print()
            # print(['*']*10)
            print('cent_time', cent_time, 'accu:', accu, 'testing loss:', loss)
            # print(['*']*10)
            print()
            value.append((cent_time, accu, loss))
            np.save(
                args.save_dir +
                'flocking_num_worker%d, k_%d, round_%d, net_lrn_%.6f, node0_lrn_%6f, attraction_%.4f_center_v1'
                % (args.num_workers, args.k, args.round, args.net_lrn,
                   args.lrns[0], args.a), np.array(value))
        time.sleep(1)
        current_time = time.time() - start_time
Пример #6
0
def main(args):
    # Create a parameter server with some random weights.
    net = model.SimpleCNN()
    all_keys, all_values = net.get_weights()
    ps = ParameterServer.remote(all_keys, all_values)

    # Start some training tasks.
    worker_tasks = [worker_task.remote(ps, i) for i in range(args.num_workers)]

    # Download MNIST.
    mnist = model.download_mnist_retry()

    i = 0
    while True:
        # Get and evaluate the current model.
        current_weights = ray.get(ps.pull.remote(all_keys))
        net.set_weights(all_keys, current_weights)
        test_xs, test_ys = mnist.test.next_batch(1000)
        accuracy = net.compute_accuracy(test_xs, test_ys)
        print("Iteration {}: accuracy is {}".format(i, accuracy))
        i += 1
        time.sleep(1)
Пример #7
0
def worker_task(ps, current_worker_index, args):
    # Download MNIST.
    mnist = model.download_mnist_retry(seed=current_worker_index + 1)

    # Initialize the model.
    args.lrn = args.lrns[current_worker_index]
    net = model.SimpleCNN(args)

    if current_worker_index == 1:
        xs, ys = mnist.train.next_batch(args.batch_sizes[current_worker_index])
        acc, loss = net.compute_accuracy_and_loss(xs, ys)
        stored_losses = [loss]

    step = 0
    start_time = time.time()
    pre_time = time.time()
    while step < args.steps and time.time() - start_time < args.stop_time:
        time.sleep(
            max(
                0, args.time_per_batch[current_worker_index] -
                (time.time() - pre_time)))
        pre_time = time.time()
        weights = ray.get(ps.get_master_weight.remote())
        # Get the current weights from the parameter server.
        net.set_flat(weights)

        # Compute an update and push it to the parameter server.
        xs, ys = mnist.train.next_batch(args.batch_sizes[current_worker_index])
        loss_value, new_weights = net.minimize(xs, ys)
        diff = new_weights - weights
        if current_worker_index != 0:
            time.sleep(0.1)
        if step % 50 == 0:
            print("step", step, "current_worker_index", current_worker_index,
                  "elapsed time is",
                  time.time() - start_time, "loss is", loss_value)
        ps.set_master_weight.remote(diff)
        step += 1
 def __init__(self, worker_index, args):
     self.worker_index = worker_index
     self.batch_size = args.batch_size
     self.mnist = model.download_mnist_retry(seed=worker_index)
     self.net = model.SimpleCNN(args)
 def __init__(self, args):
     self.net = model.SimpleCNN(args)
     self.begin = False
            print('centralized_time', cent_time, 'accu:', accu, 'loss:', loss)
            print()
            value.append((cent_time, accu, loss))
            np.save(
                args.save_dir + 'centralized_num_worker%d, round %d' %
                (args.num_workers, args.round), np.array(value))


if __name__ == "__main__":
    args = parser.parse_args()
    ray.init()

    args.save_dir = './centralized_log_%.1f/' % args.sleep_mean
    os.makedirs(args.save_dir) if not os.path.exists(args.save_dir) else None
    # Create a parameter server.
    net = model.SimpleCNN(args)
    ps = ParameterServer.remote(args)

    # Create workers.
    workers = [
        Worker.remote(worker_index, args)
        for worker_index in range(args.num_workers)
    ]

    # Download MNIST.
    mnist = model.download_mnist_retry()

    i = 0
    current_weights = ps.get_weights.remote()
    get_accu_and_loss.remote(ps, args)
    start_t = time.time()
Пример #11
0
#test_num = 100

args = sys.argv
img_file = args[1]

#numpyで入力データの設定
train_img = cv2.imread(img_file) / 255.0
train_img = np.transpose(train_img, (2, 0, 1))
x_np = np.reshape(train_img, (-1, 3, 128, 128))

#numpy配列をpytorchで扱うtensorに変換
x = torch.from_numpy(x_np).float()

#model.pyに定義したモデルのインスタンスを作成しパラメータのロード
#net = model.SimpleMLP()
net = model.SimpleCNN()
net.load_state_dict(
    torch.load("learning_result/parameters_epoch29",
               map_location=torch.device('cpu')))
if gpu_available:
    net = net.to("cuda:" + str(gpu_number))
    print("cuda available")

y = net(x)

y = y.detach().numpy()
y = np.reshape(y, (3, 128, 128))
y = np.transpose(y, (1, 2, 0))
y = np.fmin(y, 1)
y = np.fmax(y, 0)
y = (y * 255).astype(np.uint8)
Пример #12
0
 def __init__(self, learning_rate):
     self.net = model.SimpleCNN(learning_rate=learning_rate)
def worker_task(ps, current_worker_index, args):
    mnist = model.download_mnist_retry(seed=current_worker_index + 1)

    # Initialize the model.
    args.lrn = args.lrns[current_worker_index]
    net = model.SimpleCNN(args)
    xs, ys = mnist.train.next_batch(args.batch_size[current_worker_index])
    loss_value, _ = net.minimize(xs, ys)

    all_weights_ids = ray.get(ps.get_weights_ids.remote())
    new_weights = ray.get(all_weights_ids[current_worker_index])
    net.set_flat(new_weights)
    ps.set_loss.remote(current_worker_index, loss_value)

    # before we start the training, check all the loss value is set which means all workers are ready
    while True:
        losses = ray.get(ps.get_loss.remote())
        if None not in losses:
            print("begin")
            start_time = time.time()
            break
        else:
            time.sleep(0.0001)

    flocking_group = ray.get(ps.get_graph.remote())[current_worker_index]
    step = 0

    def get_flocking_potential(weights):
        all_weights_ids = ray.get(ps.get_weights_ids.remote())
        flocking_dis = []
        for fw in flocking_group:
            w = ray.get(all_weights_ids[fw])
            # check whether there is nan in the weights. For debugging purpose
            # if np.isnan(np.min(w)):
            #     print('\n\n\n\n\n\n\n\n\n\nthere is nan in weights')
            #     print(ray.get(all_weights_ids[fw]))
            #     print('fw is', fw)
            #     print(weights)
            #     print('current_worker_index is', current_worker_index)
            #     return
            flocking_dis.append(weights - w)
        return np.sum(np.array(flocking_dis), axis=0) * args.a

    start_time = time.time()
    pre_time = time.time()
    next_weigth_save_time = start_time
    while step < args.steps and time.time() - start_time < args.stop_time:
        time.sleep(
            max(
                0, args.time_per_batch[current_worker_index] -
                (time.time() - pre_time)))
        pre_time = time.time()
        xs, ys = mnist.train.next_batch(args.batch_size[current_worker_index])

        loss_value, new_weights = net.minimize(xs, ys)
        ps.set_loss.remote(current_worker_index, loss_value)
        weights = new_weights
        f_p = get_flocking_potential(weights)
        new_weights = net.get_flat()
        new_weights -= args.lrn * f_p
        net.set_flat(new_weights)
        weights_id = ray.put(new_weights)
        ps.set_weights_ids.remote(current_worker_index, [weights_id])
        step += 1
        # if step % 100 == 0 and current_worker_index == 0:
        if step % 100 == 1:
            print('step', step, 'current_worker_index', current_worker_index,
                  'elapsed_time',
                  time.time() - start_time, 'training loss is', loss_value)
        save = True
        if save:
            os.makedirs(args.save_dir + "saved_weight/", exist_ok=True)
        if time.time() > next_weigth_save_time:
            saved_weight = [time.time() - start_time, new_weights]
            np.save(
                args.save_dir +
                'saved_weight/flocking_num_worker%d, k_%d, round_%d, net_lrn_%.6f, node0_lrn_%6f, attraction_%.4f_worker_%d_time_%.2f'
                % (args.num_workers, args.k, args.round,
                   args.net_lrn, args.lrns[0], args.a, current_worker_index,
                   time.time() - start_time), np.array(saved_weight))
            next_weigth_save_time = time.time() + get_sleep_time(time.time() -
                                                                 start_time)
    ray.init(num_cpus=args.num_workers + 1)
    args.save_dir = './network_regularized_log/'
    args.time_per_batch = {i: 1 for i in range(args.num_workers)}
    args.time_per_batch[0] = 0.125
    args.lrns = {i: args.net_lrn for i in range(args.num_workers)}
    # if args.personal_lrn:
    #     args.lrns[0] = args.net_lrn/8
    print(f'learning rates are {args.lrns}')

    args.batch_size = {i: 64 for i in range(args.num_workers)}
    args.batch_size[0] = 1
    os.makedirs(args.save_dir) if not os.path.exists(args.save_dir) else None

    tmp_args = args
    tmp_args.lrn = args.net_lrn
    net = model.SimpleCNN(tmp_args)
    init_weight = net.get_flat()
    print('\n\n shape is', init_weight.shape, '\n\n')
    weights = [init_weight for _ in range(args.num_workers)]
    weights_ids = [ray.put(w) for w in weights]
    graph = construct_graph_watts(args.num_workers,
                                  args.k,
                                  seed=args.graph_seed)
    print(graph)
    ps = ParameterServer.remote(num_workers=args.num_workers,
                                weights_ids=weights_ids,
                                graph=graph)
    worker_tasks = [
        worker_task.remote(ps, i, args) for i in range(args.num_workers)
    ]
    get_accu_and_loss.remote(ps, args)