Пример #1
0
def main():
    args = define_and_get_arguments()

    hook = sy.TorchHook(torch)

    # 가상작업자(시뮬레이션) 사용시 이곳으로 분기
    if args.use_virtual:
        alice = VirtualWorker(id="alice", hook=hook, verbose=args.verbose)
        bob = VirtualWorker(id="bob", hook=hook, verbose=args.verbose)
        charlie = VirtualWorker(id="charlie", hook=hook, verbose=args.verbose)
    # 웹소켓작업자 사용시 이곳으로 분기
    else:
        a_kwargs_websocket = {"host": "192.168.0.57", "hook": hook}
        b_kwargs_websocket = {"host": "192.168.0.58", "hook": hook}
        c_kwargs_websocket = {"host": "192.168.0.59", "hook": hook}

        baseport = 10002
        alice = WebsocketClientWorker(id="alice",
                                      port=baseport,
                                      **a_kwargs_websocket)
        bob = WebsocketClientWorker(id="bob",
                                    port=baseport,
                                    **b_kwargs_websocket)
        charlie = WebsocketClientWorker(id="charlie",
                                        port=baseport,
                                        **c_kwargs_websocket)

    # 워커 객체를 리스트로 묶음
    workers = [alice, bob, charlie]

    # 쿠다 사용 여부
    use_cuda = args.cuda and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    kwargs = {"num_workers": 1, "pin_memory": True} if use_cuda else {}

    # 랜덤 시드 설정
    torch.manual_seed(args.seed)

    labels_resampled_factorized, obs_resampled_with_noise_2 = process_data()

    # percentage of test/valid set to use for testing and validation from the test_valid_idx (to be called test_size)
    test_size = 0.1

    # obtain training indices that will be used for validation
    num_train = len(obs_resampled_with_noise_2)
    indices = list(range(num_train))
    np.random.shuffle(indices)
    split = int(np.floor(test_size * num_train))
    train_idx, test_idx = indices[split:], indices[:split]

    print(type(obs_resampled_with_noise_2[train_idx]),
          type(labels_resampled_factorized[train_idx]))
    print(obs_resampled_with_noise_2[train_idx].shape,
          labels_resampled_factorized[train_idx].shape)
    print(labels_resampled_factorized[train_idx])
    federated_train_dataset = D.TensorDataset(
        torch.tensor(obs_resampled_with_noise_2[train_idx]),
        torch.tensor(labels_resampled_factorized[train_idx]))

    federated_train_loader = sy.FederatedDataLoader(
        federated_train_dataset.federate(tuple(workers)),
        batch_size=args.batch_size,
        shuffle=True,
        iter_per_worker=True,
        **kwargs,
    )

    test_dataset = D.TensorDataset(
        torch.tensor(obs_resampled_with_noise_2[test_idx]),
        torch.tensor(labels_resampled_factorized[test_idx]))

    test_loader = D.DataLoader(test_dataset,
                               shuffle=True,
                               batch_size=args.batch_size,
                               num_workers=0,
                               drop_last=True)

    model = Net(input_features=1, output_dim=5).to(device)
    criterion = nn.NLLLoss()

    for epoch in range(1, args.epochs + 1):
        logger.info("Starting epoch %s/%s", epoch, args.epochs)
        model = train(model,
                      device,
                      federated_train_loader,
                      args.lr,
                      args.federate_after_n_batches,
                      criterion=criterion)
        test(model,
             test_loader,
             args.batch_size,
             criterion=criterion,
             train_on_gpu=use_cuda)

    if args.save_model:
        torch.save(model.state_dict(), "./Model/mnist_cnn.pt")
Пример #2
0
async def main():
    args = define_and_get_arguments()

    hook = sy.TorchHook(torch)

    if (args.localworkers):
        # ----------------------------- This is for localhost workers --------------------------------
        kwargs_websocket = {
            "hook": hook,
            "verbose": args.verbose,
            "host": "0.0.0.0"
        }
        alice = websocket_client.WebsocketClientWorker(id="alice",
                                                       port=8777,
                                                       **kwargs_websocket)
        bob = websocket_client.WebsocketClientWorker(id="bob",
                                                     port=8778,
                                                     **kwargs_websocket)
        charlie = websocket_client.WebsocketClientWorker(id="charlie",
                                                         port=8779,
                                                         **kwargs_websocket)
        testing = websocket_client.WebsocketClientWorker(id="testing",
                                                         port=8780,
                                                         **kwargs_websocket)
    else:
        # ----------------------------- This is for remote workers ------------------------------------
        kwargs_websocket_alice = {"host": "128.226.78.195", "hook": hook}
        alice = websocket_client.WebsocketClientWorker(
            id="alice", port=8777, **kwargs_websocket_alice)

        kwargs_websocket_bob = {"host": "128.226.77.222", "hook": hook}
        bob = websocket_client.WebsocketClientWorker(id="bob",
                                                     port=8777,
                                                     **kwargs_websocket_bob)

        kwargs_websocket_charlie = {"host": "128.226.88.120", "hook": hook}
        charlie = websocket_client.WebsocketClientWorker(
            id="charlie", port=8777, **kwargs_websocket_charlie)

        # kwargs_websocket_testing = {"host": "128.226.77.111", "hook": hook}
        kwargs_websocket_testing = {"host": "128.226.88.210", "hook": hook}
        testing = websocket_client.WebsocketClientWorker(
            id="testing", port=8777, **kwargs_websocket_testing)

    for wcw in [alice, bob, charlie, testing]:
        wcw.clear_objects_remote()

    worker_instances = [alice, bob, charlie]

    use_cuda = args.cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    model = Net().to(device)
    if (os.path.isfile('mnist_cnn_asyn.pt')):
        model.load_state_dict(torch.load("mnist_cnn_asyn.pt"))
        model.eval()

    traced_model = torch.jit.trace(
        model,
        torch.zeros([1, 1, 28, 28], dtype=torch.float).to(device))
    learning_rate = args.lr

    # Execute traning and test process round
    for curr_round in range(1, args.training_rounds + 1):
        logger.info("Training round %s/%s", curr_round, args.training_rounds)

        results = await asyncio.gather(*[
            fit_model_on_worker(
                worker=worker,
                traced_model=traced_model,
                batch_size=args.batch_size,
                curr_round=curr_round,
                max_nr_batches=args.federate_after_n_batches,
                lr=learning_rate,
            ) for worker in worker_instances
        ])
        models = {}
        loss_values = {}

        # Apply evaluate model for each 10 round and at the last round
        test_models = curr_round % 10 == 1 or curr_round == args.training_rounds
        if test_models:
            logger.info("Evaluating models")
            np.set_printoptions(formatter={"float": "{: .0f}".format})
            for worker_id, worker_model, _ in results:
                evaluate_model_on_worker(
                    model_identifier="Model update " + worker_id,
                    worker=testing,
                    dataset_key="mnist_testing",
                    model=worker_model,
                    nr_bins=10,
                    batch_size=128,
                    device=device,
                    print_target_hist=False,
                )

        # Federate models (note that this will also change the model in models[0]
        for worker_id, worker_model, worker_loss in results:
            if worker_model is not None:
                models[worker_id] = worker_model
                loss_values[worker_id] = worker_loss

        traced_model = utils.federated_avg(models)

        if test_models:
            evaluate_model_on_worker(
                model_identifier="Federated model",
                worker=testing,
                dataset_key="mnist_testing",
                model=traced_model,
                nr_bins=10,
                batch_size=128,
                device=device,
                print_target_hist=False,
            )
            # save indermediate model
            model_dir = "models_asyn"
            if (not os.path.exists(model_dir)):
                os.makedirs(model_dir)
            model_name = "{}/mnist_cnn_{}.pt".format(model_dir, curr_round)
            torch.save(traced_model.state_dict(), model_name)

        # decay learning rate
        learning_rate = max(0.98 * learning_rate, args.lr * 0.01)

    if args.save_model:
        torch.save(traced_model.state_dict(), "mnist_cnn_asyn.pt")
Пример #3
0
import torch
import syft

from syft.workers.websocket_server import WebsocketServerWorker


class WebsocketServerWorkerGood(WebsocketServerWorker):
    def set_obj(self, obj: object):
        self._objects[obj.id] = obj
        self._objects[obj.id].owner = self


# Hook and start server
hook = syft.TorchHook(torch)
server_worker = WebsocketServerWorkerGood(id="good",
                                          host="localhost",
                                          port=8778,
                                          hook=hook)

test_data = torch.tensor([1, 2, 3]).tag("test")
server_worker.set_obj(test_data)

print("Good server started.")
server_worker.start()
Пример #4
0
def main():
    args = define_and_get_arguments()
    print(args)
    hook = sy.TorchHook(torch)

    host = "localhost"

    if args.use_virtual:
        alice = VirtualWorker(id="alice", hook=hook, verbose=args.verbose)
        bob = VirtualWorker(id="bob", hook=hook, verbose=args.verbose)
        charlie = VirtualWorker(id="charlie", hook=hook, verbose=args.verbose)
    else:
        kwargs_websocket = {
            "host": host,
            "hook": hook,
            "verbose": args.verbose
        }
        alice = WebsocketClientWorker(id="alice",
                                      port=8771,
                                      **kwargs_websocket)
        bob = WebsocketClientWorker(id="bob", port=8772, **kwargs_websocket)
        charlie = WebsocketClientWorker(id="charlie",
                                        port=8773,
                                        **kwargs_websocket)

    workers = [alice, bob, charlie]

    use_cuda = args.cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    kwargs = {"num_workers": 1, "pin_memory": True} if use_cuda else {}

    # Search multiple times should still work
    tr_alice = alice.search("#mnist", "#alice", "#train_tag")
    tr_bob = bob.search("#mnist", "#bob", "#train_tag")
    tr_charlie = charlie.search("#mnist", "#charlie", "#train_tag")

    base_data = []
    base_data.append(BaseDataset(tr_alice[0], tr_alice[1]))
    base_data.append(BaseDataset(tr_bob[0], tr_bob[1]))
    base_data.append(BaseDataset(tr_charlie[0], tr_charlie[1]))

    federated_train_loader = sy.FederatedDataLoader(
        FederatedDataset(base_data),
        batch_size=args.batch_size,
        shuffle=True,
        iter_per_worker=True,
        **kwargs,
    )

    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST(
            "../data",
            train=False,
            transform=transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.1307, ), (0.3081, ))
            ]),
        ),
        batch_size=args.test_batch_size,
        shuffle=True,
        **kwargs,
    )

    model = Net().to(device)

    for epoch in range(1, args.epochs + 1):
        logger.info("Starting epoch %s/%s", epoch, args.epochs)
        model = train(model, device, federated_train_loader, args.lr,
                      args.federate_after_n_batches)
        test(model, device, test_loader)

    if args.save_model:
        torch.save(model.state_dict(), "mnist_cnn.pt")
Пример #5
0
from grid import GridNetwork
from model import Model
import time
import sys

import syft as sy
import torch as th
import asyncio

hook = sy.TorchHook(th)

if __name__ == "__main__":
    node_id = sys.argv[1]
    connect = int(sys.argv[2])
    destination = sys.argv[3]

    # args = {"max_size": None, "timeout": 444, "url": "ws://openmined-grid.herokuapp.com"}
    args = {"max_size": None, "timeout": 444, "url": "ws://34.89.48.186"}
    grid = GridNetwork(node_id, **args)
    grid.start()

    if connect:
        node = grid.connect(destination)
    else:
        time.sleep(10)
        node = grid._connection_handler.get("bill")

    # asyncio.run(node.send(b'Hello!'))

    x = th.tensor([1, 2, 3, 4, 5, 6, 7]).tag("#X",
                                             "#test").describe("My Little obj")
Пример #6
0
def hook():
    hook = syft.TorchHook(torch)
    return hook
Пример #7
0
def main():
    args = define_and_get_arguments()
    hook = sy.TorchHook(torch)

    # 가상작업자(시뮬레이션) 사용시 이곳으로 분기
    if args.use_virtual:
        alice = VirtualWorker(id="alice", hook=hook, verbose=args.verbose)
        bob = VirtualWorker(id="bob", hook=hook, verbose=args.verbose)
        charlie = VirtualWorker(id="charlie", hook=hook, verbose=args.verbose)
        list_of_object = [alice, bob, charlie]
    # 웹소켓작업자 사용시 이곳으로 분기
    else:
        base_port = 10002
        list_of_id = ["alice", "bob", "charlie"]
        list_of_ip = ["192.168.0.52", "192.168.0.53", "192.168.0.54"]
        list_of_object = []
        for index in range(len(list_of_id)):
            kwargs_websockest = {"id": list_of_id[index], "hook": hook}
            list_of_object.append(
                WebsocketClientWorker(host=list_of_ip[index],
                                      port=base_port,
                                      **kwargs_websockest))

    workers = list_of_object

    use_cuda = args.cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    kwargs = {"num_workers": 1, "pin_memory": True} if use_cuda else {}

    federated_train_loader = sy.FederatedDataLoader(
        datasets.MNIST(
            "../data",
            train=True,
            download=True,
            transform=transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.1307, ), (0.3081, ))
            ]),
        ).federate(tuple(workers)),
        batch_size=args.batch_size,
        shuffle=True,
        iter_per_worker=True,
        **kwargs,
    )

    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST(
            "../data",
            train=False,
            transform=transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.1307, ), (0.3081, ))
            ]),
        ),
        batch_size=args.test_batch_size,
        shuffle=True,
        **kwargs,
    )

    model = Net().to(device)

    for epoch in range(1, args.epochs + 1):
        logger.info("Starting epoch %s/%s", epoch, args.epochs)
        model = train(model, device, federated_train_loader, args.lr,
                      args.federate_after_n_batches)
        test(model, device, test_loader)

    if args.save_model:
        torch.save(model.state_dict(), "mnist_cnn.pt")
'''
Part 6: Federated Learning on MNIST using a CNN
- Federated Learning aims to build systems that learn on decentralized data, improving data privacy and ownership.

'''
# setting
import torch 
import torch.nn as nn 
import torch.nn.functional as F 
import torch.optim as optim 
from torchvision import datasets, transforms 

import syft as sy #import PySyft library 
hook = sy.TorchHook(torch) # hook PyTorch 
bob = sy.VirtualWorker(hook, id = 'bob') 
alice = sy.VirtualWorker(hook, id = 'alice') 

class Arguments():
	def __init__(self):
		self.batch_size = 64 
		self.test_batch_size = 1000 
		self.epochs = 10 
		self.lr = 0.01 
		self.momentum = 0.5 
		self.no_cuda = False 
		self.seed = 1
		self.log_interval = 30 
		self.save_model = False 

args = Arguments() 
Пример #9
0
def experiment(num_workers, no_cuda):

    # Creating num_workers clients
    clients = []
    hook = sy.TorchHook(torch)
    clients_mem = torch.zeros(num_workers)
    for i in range(num_workers):
        clients.append(sy.VirtualWorker(hook, id="c " + str(i)))

    # Initializing arguments, with GPU usage or not
    args = Arguments(no_cuda)

    use_cuda = not args.no_cuda and torch.cuda.is_available()
    if use_cuda:
        # TODO Quickhack. Actually need to fix the problem moving the model to CUDA\n",
        torch.set_default_tensor_type(torch.cuda.FloatTensor)

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")
    kwargs = {'num_workers': 0, 'pin_memory': False} if use_cuda else {}

    # Federated data loader
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    federated_train_loader = sy.FederatedDataLoader(  # <-- this is now a FederatedDataLoader
        datasets.CIFAR10('../data',
                         train=True,
                         download=True,
                         transform=transform).
        federate(
            clients
        ),  # <-- NEW: we distribute the dataset across all the workers, it's now a FederatedDataset
        batch_size=args.batch_size,
        shuffle=True,
        **kwargs)

    test_loader = torch.utils.data.DataLoader(datasets.CIFAR10(
        '../data', train=False, transform=transform),
                                              batch_size=args.test_batch_size,
                                              shuffle=True,
                                              **kwargs)

    #creating the models for each client
    models, optimizers = [], []

    for i in range(num_workers):
        #print(i)
        models.append(vgg11_SL()[0].to(device))
        models[i] = models[i].send(clients[i])
        optimizers.append(
            optim.SGD(params=models[i].parameters(), lr=args.lr, momentum=0.9))

    start = time.time()
    model = vgg11_SL()[1].to(device)
    optimizer = optim.SGD(
        model.parameters(), lr=args.lr,
        momentum=0.9)  # TODO momentum is not supported at the moment

    for epoch in range(1, args.epochs + 1):
        train(args, model, device, federated_train_loader, optimizer, epoch,
              models, optimizers, clients_mem)
        test(args, model, device, test_loader, models)

    if (args.save_model):
        torch.save(model.state_dict(), "mnist_cnn.pt")

    end = time.time()
    print(end - start)
    print("Memory exchanged : ", clients_mem)
    return clients_mem
Пример #10
0
model_params = list(model.parameters())

bobs_model = Net()
alices_model = Net()

import syft
import syft as sy
from syft.core import utils
import torch
import torch.nn.functional as F
import json
import random
from syft.core.frameworks.torch import utils as torch_utils
from torch.autograd import Variable

hook = sy.TorchHook(verbose=False)
me = hook.local_worker
bob = sy.VirtualWorker(id="bob", hook=hook, is_client_worker=False)
alice = sy.VirtualWorker(id="alice", hook=hook, is_client_worker=False)
me.is_client_worker = False

compute_nodes = [bob, alice]

me.add_workers([bob, alice])
bob.add_workers([me, alice])
alice.add_workers([me, bob])

remote_dataset = (list(), list())

for batch_idx, (data, target) in enumerate(train_loader):
    data = Variable(data)
Пример #11
0
    def __init__(self):
        with open('data/dcrnn_la.yaml') as f_la, open(
                'data/dcrnn_bay.yaml') as f_bay:
            config_la = yaml.load(f_la, Loader=yaml.FullLoader)
            config_bay = yaml.load(f_bay, Loader=yaml.FullLoader)

        sensor_ids1, sensor_id_to_ind1, adj_mx_la = load_graph_data(
            config_la['data'].get('graph_pkl_filename'))
        sensor_ids2, sensor_id_to_ind2, adj_mx_bay = load_graph_data(
            config_bay['data'].get('graph_pkl_filename'))

        self._kwargs = config_la
        self._data_kwargs = config_la.get('data')
        self._model_kwargs = config_la.get('model')
        self._data_kwargs2 = config_bay.get('data')
        self._model_kwargs2 = config_bay.get('model')
        self._train_kwargs = config_la.get('train')

        self.max_grad_norm = self._train_kwargs.get('max_grad_norm', 1.)

        # logging.
        self._log_dir = self._get_log_dir(config_la)
        self._writer = SummaryWriter('runs/' + self._log_dir)

        log_level = self._kwargs.get('log_level', 'INFO')
        self._logger = utils.get_logger(self._log_dir,
                                        __name__,
                                        'info.log',
                                        level=log_level)

        # data set
        self._data = utils.load_dataset(**self._data_kwargs)
        self._data2 = utils.load_dataset(**self._data_kwargs2)
        self.standard_scaler = self._data['scaler']
        self.standard_scaler2 = self._data2['scaler']

        self._logger.info('Setting: {}'.format(args.setting))
        self._logger.info("Party A trn samples: {}".format(
            self._data['train_loader'].size))
        self._logger.info("Party A vld samples: {}".format(
            self._data['val_loader'].size))
        self._logger.info("Party A tst samples: {}".format(
            self._data['test_loader'].size))
        self._logger.info("Party B trn samples: {}".format(
            self._data2['train_loader'].size))
        self._logger.info("Party B vld samples: {}".format(
            self._data2['val_loader'].size))
        self._logger.info("Party B tst samples: {}".format(
            self._data2['test_loader'].size))

        self.num_nodes = int(self._model_kwargs.get('num_nodes', 1))
        self.num_nodes2 = int(self._model_kwargs2.get('num_nodes', 1))
        self._logger.info("num_nodes: {}".format(self.num_nodes))
        self._logger.info("num_nodes2: {}".format(self.num_nodes2))

        self.input_dim = int(self._model_kwargs.get('input_dim', 1))
        self.seq_len = int(
            self._model_kwargs.get('seq_len'))  # for the encoder
        self.output_dim = int(self._model_kwargs.get('output_dim', 1))
        self.use_curriculum_learning = bool(
            self._model_kwargs.get('use_curriculum_learning', False))
        self.horizon = int(self._model_kwargs.get('horizon',
                                                  1))  # for the decoder

        # setup model
        dcrnn_model = DCRNNModel(adj_mx_la, self._logger, **self._model_kwargs)
        dcrnn_model2 = DCRNNModel(adj_mx_bay, self._logger,
                                  **self._model_kwargs2)

        if torch.cuda.is_available():
            # dcrnn_model = nn.DataParallel(dcrnn_model)
            # dcrnn_model2 = nn.DataParallel(dcrnn_model2)
            self.dcrnn_model = dcrnn_model.cuda()
            self.dcrnn_model2 = dcrnn_model2.cuda()
        else:
            self.dcrnn_model = dcrnn_model
            self.dcrnn_model2 = dcrnn_model2
        self._logger.info("Models created")
        self._logger.info('Local epochs:' + str(args.local_epochs))

        self._epoch_num = self._train_kwargs.get('epoch', 0)
        if self._epoch_num > 0:
            self.load_model(self._epoch_num)

        # use PySyft for SPDZ
        if args.setting == 'fedavg' and args.spdz:
            import syft as sy
            self._logger.info('Using SPDZ for FedAvg')
            hook = sy.TorchHook(torch)
            self.party_workers = [
                sy.VirtualWorker(hook, id="party{:d}".format(i))
                for i in range(2)
            ]
            self.crypto = sy.VirtualWorker(hook, id="crypto")

        # DP
        if args.dp:

            class HiddenPrints:
                def __enter__(self):
                    self._original_stdout = sys.stdout
                    sys.stdout = open(os.devnull, 'w')

                def __exit__(self, exc_type, exc_val, exc_tb):
                    sys.stdout.close()
                    sys.stdout = self._original_stdout

            def find_sigma(eps, batches_per_lot, dataset_size):
                lotSize = batches_per_lot * args.batch_size  # L
                N = dataset_size
                delta = min(10**(-5), 1 / N)
                lotsPerEpoch = N / lotSize
                q = lotSize / N  # Sampling ratio
                T = args.epochs * lotsPerEpoch  # Total number of lots

                def compute_dp_sgd_wrapper(_sigma):
                    with HiddenPrints():
                        return compute_dp_sgd_privacy.compute_dp_sgd_privacy(
                            n=N,
                            batch_size=lotSize,
                            noise_multiplier=_sigma,
                            epochs=args.epochs,
                            delta=delta)[0] - args.epsilon

                sigma = newton(compute_dp_sgd_wrapper, x0=0.5,
                               tol=1e-4)  # adjust x0 to avoid error
                with HiddenPrints():
                    actual_eps = compute_dp_sgd_privacy.compute_dp_sgd_privacy(
                        n=N,
                        batch_size=lotSize,
                        noise_multiplier=sigma,
                        epochs=args.epochs,
                        delta=delta)[0]
        #         print('Batches_per_lot={}, q={}, T={}, sigma={}'.format(batches_per_lot, q, T, sigma))
        #         print('actual epslion = {}'.format(actual_eps))
                return sigma

            self._logger.info('Epsilon: ' + str(args.epsilon))
            self._logger.info('Lotsize_scaler: ' + str(args.lotsize_scaler))
            lotsizes = [
                N**.5 * args.lotsize_scaler for N in [
                    self._data['train_loader'].size,
                    self._data2['train_loader'].size
                ]
            ]
            batches_per_lot_list = list(
                map(lambda lotsize: max(round(lotsize / args.batch_size), 1),
                    lotsizes))
            batches_per_lot_list = [
                min(bpl, loader_len)
                for bpl, loader_len in zip(batches_per_lot_list, [
                    self._data['train_loader'].num_batch,
                    self._data2['train_loader'].num_batch
                ])
            ]
            self._logger.info('Batches per lot: ' + str(batches_per_lot_list))
            sigma_list = [
                find_sigma(args.epsilon, bpl, N)
                for bpl, N in zip(batches_per_lot_list, [
                    self._data['train_loader'].size,
                    self._data2['train_loader'].size
                ])
            ]
            self._logger.info('Sigma: ' + str(sigma_list))

            for mod, bpl, sig in zip([self.dcrnn_model, self.dcrnn_model2],
                                     batches_per_lot_list, sigma_list):
                mod.batch_per_lot = bpl
                mod.sigma = sig

            self.dcrnn_model.batch_per_lot = batches_per_lot_list[0]
            self.dcrnn_model.sigma = sigma_list[0]
            self.dcrnn_model2.batch_per_lot = batches_per_lot_list[1]
            self.dcrnn_model2.sigma = sigma_list[1]

            self._lastNoiseShape = None
            self._noiseToAdd = None
Пример #12
0
def experiment(num_workers, no_cuda):

    # Creating num_workers clients
    clients = []
    hook = sy.TorchHook(torch)
    clients_mem = torch.zeros(num_workers)
    for i in range(num_workers):
        clients.append(sy.VirtualWorker(hook, id="c " + str(i)))

    # Initializing arguments, with GPU usage or not
    args = Arguments(no_cuda)

    use_cuda = not args.no_cuda and torch.cuda.is_available()
    if use_cuda:
        # TODO Quickhack. Actually need to fix the problem moving the model to CUDA\n",
        torch.set_default_tensor_type(torch.cuda.FloatTensor)

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")
    kwargs = {'num_workers': 0, 'pin_memory': False} if use_cuda else {}

    # Federated data loader
    federated_train_loader = sy.FederatedDataLoader(datasets.MNIST(
        '../data',
        train=True,
        download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])).federate(clients),
                                                    batch_size=args.batch_size,
                                                    shuffle=True,
                                                    **kwargs)

    test_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '../data',
        train=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                              batch_size=args.test_batch_size,
                                              shuffle=True,
                                              **kwargs)

    #creating the models and optimizers for each client
    models, optimizers = [], []
    for i in range(num_workers):
        models.append(Net1().to(device))
        models[i] = models[i].send(clients[i])
        optimizers.append(optim.SGD(params=models[i].parameters(), lr=0.1))

    # measuring training time
    start = time.time()
    model = Net2().to(device)
    optimizer = optim.SGD(model.parameters(), lr=args.lr)

    for epoch in range(1, args.epochs + 1):
        train(args, model, device, federated_train_loader, optimizer, epoch,
              models, optimizers, clients_mem)
        test(args, model, device, test_loader, models)

    if (args.save_model):
        torch.save(model.state_dict(), "mnist_cnn.pt")

    end = time.time()
    print(end - start)
    # printing the memory exchanged by each client
    print("Memory exchanged : ", clients_mem)
    return clients_mem
Пример #13
0
import torch.nn as nn
import os
import syft
import torchvision

from utils import cf_calc
from torchvision import transforms as T
from torchnet import meter
from config import DefaultConfig
from torch.utils.data import DataLoader
from visdom import Visdom

opt = DefaultConfig()
vis = Visdom(env=opt.visname, log_to_filename='./logs/logging.txt')

hook = syft.TorchHook(torch)  #用pysyft的hook增加pytorch的方法库,使其支持联邦学习
os.environ['CUDA_VISIBLE_DEVICES'] = opt.Devices_ID  #指定所使用GPU的编号
device = torch.device("cuda" if opt.use_gpu else "cpu")  #指定进行模型优化的设备
torch.manual_seed(opt.random_seed)


def train(**kwargs):
    #使用命令行参数更新默认参数配置
    opt.parse(kwargs)

    #定义模型
    #这里只给出单GPU训练的定义方式
    #多GPU并行训练的模型定义方式略有差异
    model = getattr(models,
                    opt.model)(**opt.model_setting[opt.model])  #用字典将模型参数转入
    print(model)
Пример #14
0
# In[5]:

import syft
import syft as sy
from syft.core import utils
import torch
import torch.nn.functional as F
import json
import random
from syft.core.frameworks.torch import utils as torch_utils
from torch.autograd import Variable

#local_worker = sy.SocketWorker(id="local", port=2009, hook=None, is_client_worker=False)
local_worker = sy.SocketWorker(id="local", port=2009, hook=None)
hook = sy.TorchHook(local_worker=local_worker, verbose=False)
me = hook.local_worker
me.hook = hook

# In[6]:

alice = sy.SocketWorker(id="alice",
                        hostname="100.65.100.179",
                        port=2006,
                        hook=hook,
                        is_pointer=True,
                        is_client_worker=False)
#alice = sy.SocketWorker(id="alice", hostname="172.31.33.80", port=2006, hook=hook, is_pointer=True, is_client_worker=False)

bob = sy.SocketWorker(id="bob",
                      hostname="100.65.100.179",
# remote.py
import syft as sy
import sys
import torch
import asyncio
from sklearn.preprocessing import StandardScaler

hook = sy.TorchHook(torch, verbose=True)
torch.manual_seed(1)

configs = {
    "id": "hospital_2",
    "host": "localhost",
    "hook": hook,
    "verbose": False,
    "port": 8084
}


async def show_all(worker):
    await asyncio.sleep(0)
    while True:
        print("Objects:", worker._objects)
        await asyncio.sleep(2.0)


from sklearn.datasets import load_breast_cancer

data = load_breast_cancer()
# Standardise data for input into Neural Net
scaler = StandardScaler()
Пример #16
0
def main():
    args = define_and_get_arguments()

    hook = sy.TorchHook(torch)

    if args.use_virtual:
        alice = VirtualWorker(id="alice", hook=hook, verbose=args.verbose)
        bob = VirtualWorker(id="bob", hook=hook, verbose=args.verbose)
        charlie = VirtualWorker(id="charlie", hook=hook, verbose=args.verbose)
    else:
        kwargs_websocket = {
            "host": "localhost",
            "hook": hook,
            "verbose": args.verbose
        }
        alice = WebsocketClientWorker(id="alice",
                                      port=8777,
                                      **kwargs_websocket)
        bob = WebsocketClientWorker(id="bob", port=8778, **kwargs_websocket)
        charlie = WebsocketClientWorker(id="charlie",
                                        port=8779,
                                        **kwargs_websocket)

    workers = [alice, bob, charlie]

    use_cuda = args.cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    kwargs = {"num_workers": 1, "pin_memory": True} if use_cuda else {}

    federated_train_loader = sy.FederatedDataLoader(
        datasets.MNIST(
            "../data",
            train=True,
            download=True,
            transform=transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.1307, ), (0.3081, ))
            ]),
        ).federate(tuple(workers)),
        batch_size=args.batch_size,
        shuffle=True,
        iter_per_worker=True,
        **kwargs,
    )

    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST(
            "../data",
            train=False,
            transform=transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.1307, ), (0.3081, ))
            ]),
        ),
        batch_size=args.test_batch_size,
        shuffle=True,
        **kwargs,
    )

    model = Net().to(device)

    for epoch in range(1, args.epochs + 1):
        logger.info("Starting epoch %s/%s", epoch, args.epochs)
        model = train(model, device, federated_train_loader, args.lr,
                      args.federate_after_n_batches)
        test(model, device, test_loader)

    if args.save_model:
        torch.save(model.state_dict(), "mnist_cnn.pt")
async def main():
    hook = sy.TorchHook(torch)
    device = torch.device("cpu")
    model = Net()
    model.build(torch.zeros([1, 1, 28, 28], dtype=torch.float).to(device))

    @sy.func2plan()
    def loss_fn(pred, target):
        return nll_loss(input=pred, target=target)

    input_num = torch.randn(3, 5, requires_grad=True)
    target = torch.tensor([1, 0, 4])
    dummy_pred = F.log_softmax(input_num, dim=1)
    loss_fn.build(dummy_pred, target)

    epoch_num = 21
    batch_size = 64
    lr = 0.1
    learning_rate = lr
    optimizer_args = {"lr" : lr}

    # alice = NodeClient(hook, "ws://172.16.179.20:6666" , id="alice")
    # bob = NodeClient(hook, "ws://172.16.179.21:6667" , id="bob")
    # charlie = NodeClient(hook, "ws://172.16.179.22:6668", id="charlie")
#     testing = NodeClient(hook, "ws://localhost:6669" , id="testing")

    # worker_list = [alice, bob, charlie]

    worker_list = []
    for i in range(2, 8):
        worker = NodeClient(hook, "ws://"+flvm_ip[i]+":6666" , id="flvm-"+str(i))
        worker_list.append(worker)

    grid = sy.PrivateGridNetwork(*worker_list)

    for epoch in range(epoch_num):

        logger.info("Training round %s/%s", epoch, epoch_num)

        round_start_time = time.time()

        results = await asyncio.gather(
            *[
                fit_model_on_worker(
                    worker=worker,
                    built_model=model,
                    built_loss_fn=loss_fn,
                    encrypters=worker_list,
                    batch_size=batch_size,
                    curr_round=epoch,
                    max_nr_batches=-1,
                    lr=0.1,
                )
                for worker in worker_list
            ]
        )

        local_train_end_time = time.time()
        print("[trace]", "AllWorkersTrainingTime", "duration", "COORD", local_train_end_time - round_start_time)

        enc_models = {}
        loss_values = {}
        data_amounts = {}
        total_data_amount = 0


        for worker_id, enc_params, worker_loss, num_of_training_data in results:
            if enc_params is not None:
                enc_models[worker_id] = enc_params
                loss_values[worker_id] = worker_loss
                data_amounts[worker_id] = num_of_training_data
                total_data_amount += num_of_training_data

        ## aggregation
        nr_enc_models = len(enc_models)
        enc_models_list = list(enc_models.values())
        data_amounts_list = list(data_amounts.values()) ##
        dst_enc_model = enc_models_list[0]

        aggregation_start_time = time.time()
        with torch.no_grad():
            for i in range(len(dst_enc_model)):
                for j in range(1, nr_enc_models):
                    dst_enc_model[i] += enc_models_list[j][i]
        aggregation_end_time = time.time()
        print("[trace]", "AggregationTime", "duration", "COORD", aggregation_end_time - aggregation_start_time)


        ## decryption
        new_params = []
        decryption_start_time = time.time()
        with torch.no_grad():
            for i in range(len(dst_enc_model)):
                decrypt_para = dst_enc_model[i].get()
                new_para = decrypt_para.float_precision()
                new_para = new_para / int(total_data_amount)
                model.parameters()[i].set_(new_para)

        round_end_time = time.time()
        print("[trace]", "DecryptionTime", "duration", "COORD", round_end_time - decryption_start_time)
        print("[trace]", "RoundTime", "duration", "COORD", round_end_time - round_start_time)

        ## FedAvg
#         nr_models = len(models)
#         model_list = list(models.values())
#         dst_model = model_list[0]
#         nr_params = len(dst_model.parameters())
#         with torch.no_grad():
#             for i in range(1, nr_models):
#                 src_model = model_list[i]
#                 src_params = src_model.parameters()
#                 dst_params = dst_model.parameters()
#                 for i in range(nr_params):
#                     dst_params[i].set_(src_params[i].data + dst_params[i].data)
#             for i in range(nr_params):
#                 dst_params[i].set_(dst_params[i].data * 1/total_data_amount)


#         if epoch%5 == 0 or epoch == 49:
#             evaluate_model_on_worker(
#                 model_identifier="Federated model",
#                 worker=testing,
#                 dataset_key="mnist_testing",
#                 model=model,
#                 built_loss_fn=loss_fn,
#                 nr_bins=10,
#                 batch_size=64,
#                 device=device,
#                 print_target_hist=False,
#             )

        model.pointers = {}
        loss_fn.pointers = {}

        # decay learning rate
        learning_rate = max(0.98 * learning_rate, lr * 0.01)
async def main():
    args = define_and_get_arguments()

    hook = sy.TorchHook(torch)

    kwargs_websocket = {
        "hook": hook,
        "verbose": args.verbose,
        "host": "0.0.0.0"
    }
    alice = websocket_client.WebsocketClientWorker(id="alice",
                                                   port=8777,
                                                   **kwargs_websocket)
    bob = websocket_client.WebsocketClientWorker(id="bob",
                                                 port=8778,
                                                 **kwargs_websocket)
    charlie = websocket_client.WebsocketClientWorker(id="charlie",
                                                     port=8779,
                                                     **kwargs_websocket)
    testing = websocket_client.WebsocketClientWorker(id="testing",
                                                     port=8780,
                                                     **kwargs_websocket)

    for wcw in [alice, bob, charlie, testing]:
        wcw.clear_objects_remote()

    worker_instances = [alice, bob, charlie]

    use_cuda = args.cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    model = Net().to(device)

    traced_model = torch.jit.trace(
        model,
        torch.zeros([1, 1, 28, 28], dtype=torch.float).to(device))
    learning_rate = args.lr

    for curr_round in range(1, args.training_rounds + 1):
        logger.info("Training round %s/%s", curr_round, args.training_rounds)

        results = await asyncio.gather(*[
            fit_model_on_worker(
                worker=worker,
                traced_model=traced_model,
                batch_size=args.batch_size,
                curr_round=curr_round,
                max_nr_batches=args.federate_after_n_batches,
                lr=learning_rate,
            ) for worker in worker_instances
        ])
        models = {}
        loss_values = {}

        test_models = curr_round % 10 == 1 or curr_round == args.training_rounds
        if test_models:
            logger.info("Evaluating models")
            np.set_printoptions(formatter={"float": "{: .0f}".format})
            for worker_id, worker_model, _ in results:
                evaluate_model_on_worker(
                    model_identifier="Model update " + worker_id,
                    worker=testing,
                    dataset_key="mnist_testing",
                    model=worker_model,
                    nr_bins=10,
                    batch_size=128,
                    device=device,
                    print_target_hist=False,
                )

        # Federate models (note that this will also change the model in models[0]
        for worker_id, worker_model, worker_loss in results:
            if worker_model is not None:
                models[worker_id] = worker_model
                loss_values[worker_id] = worker_loss

        traced_model = utils.federated_avg(models)

        if test_models:
            evaluate_model_on_worker(
                model_identifier="Federated model",
                worker=testing,
                dataset_key="mnist_testing",
                model=traced_model,
                nr_bins=10,
                batch_size=128,
                device=device,
                print_target_hist=False,
            )

        # decay learning rate
        learning_rate = max(0.98 * learning_rate, args.lr * 0.01)

    if args.save_model:
        torch.save(model.state_dict(), "mnist_cnn.pt")
def main():
    # check and use GPU if available if not use CPU
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("device is {}".format(device))

    # arguments
    args = setup_utils.setup_and_load()
    print("Arguments are")
    print(args)

    # set seed
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    time_int = int(time.time())
    seed = time_int % 10000
    set_global_seeds(seed * 100 + rank)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

    if Config.NUM_ENVS > 1:
        print("To do: add multi env support")
    nenvs = 1  #Config.NUM_ENVS
    env = utils.make_general_env(nenvs, seed=rank)

    # wrap env (not needed with Coinrun options)
    #env = dqn_utils.wrap_deepmind(env, clip_rewards=False, frame_stack=True, scale=False)
    action_size = env.action_space.n

    # set up pysyft workers
    num_workers = 2
    hook = sy.TorchHook(torch)
    worker_1 = sy.VirtualWorker(hook, id='worker_1')
    worker_2 = sy.VirtualWorker(hook, id='worker_2')
    secure_worker = sy.VirtualWorker(hook, id='secure_worker')
    worker_list = []
    worker_list.append(worker_1)
    worker_list.append(worker_2)

    # Training hyperparameters
    timesteps = 250000  #2000000 #1000#2000000  # run env for this many time steps
    hidden_size = 512  # side of hidden layer of FFNN that connects CNN to outputs
    is_dueling = True
    is_impala_net = False
    learning_rate = 0.0001  # learning rate of optimizer
    batch_size = 32  # size of batch trained on
    start_training_after = 10000  # start training NN after this many timesteps
    discount = 0.99  # discount future states by

    epsilon_start = 1.0  # epsilon greedy start value
    epsilon_min = 0.02  # epsilon greedy end value
    epsilon_decay_steps = timesteps * .5  # decay epsilon over this many timesteps
    epsilon_step = (epsilon_start - epsilon_min) / (
        epsilon_decay_steps)  # decrement epsilon by this amount every timestep

    update_target_every = 1  # update target network every this steps
    tau = 0.001  # soft target updating amount

    frame_skip = 4  #hold action for this many frames
    save_every = 10000  #timesteps to save model after
    train_every = 1  # number of times to train

    # create replay buffer
    replay_size = 50000  # size of replay buffer
    replay_buffer_list = []
    for i in range(num_workers):
        replay_buffer = dqn_utils.ReplayBuffer(max_size=replay_size)
        replay_buffer_list.append(replay_buffer)

    # create DQN Agent
    dqn_agent = dqn_utils.DQNAgent(action_size, hidden_size, learning_rate,
                                   is_dueling, is_impala_net)

    # create states for every env
    stats_every = 10  #10  # print stats every this many episodes
    stats_list = []  # store stats for each env init here
    for i in range(num_workers):
        temp_dict = {}
        temp_dict['episode'] = 0
        temp_dict['mean_reward_total'] = 0.
        temp_dict['mean_ep_length_total'] = 0.
        temp_dict['mean_reward_recent'] = 0.
        temp_dict['mean_ep_length_recent'] = 0.
        temp_dict['episode_loss'] = 0.
        temp_dict['episode_reward'] = 0.
        temp_dict['episode_length'] = 0.
        stats_list.append(temp_dict)

    # training loop
    epsilon = epsilon_start
    # take no_action on first step to get state
    # use state to tell which level
    # env.reset() does not produce and observation in CoinRun until an action is taken
    no_action = np.zeros((nenvs, ), dtype=np.int32)
    state_list, _, _, _ = env.step(no_action)

    # assign each level to a worker
    # coinrun doesn't have a way to tell the current level so take mean of first screen of level and use dictionary to assign levels
    # worker_level is used to tell which replay buffer to put data into (ie which worker is training)
    level_worker_dict = {}
    levels_assigned = 0

    def get_worker_level(state, lw_dict, la, nw):
        temp_key = int(1000 * np.mean(state))
        if temp_key not in lw_dict:
            la += 1
            lw_dict[temp_key] = la % nw
            print("Adding new key to level_worker_dict. current size is: {}".
                  format(len(lw_dict)))
            print(lw_dict)
        return lw_dict[temp_key], lw_dict, la

    worker_level, level_worker_dict, levels_assigned = get_worker_level(
        state_list[0], level_worker_dict, levels_assigned, num_workers)

    for ts in range(timesteps):
        # decay epsilon
        epsilon -= epsilon_step
        if epsilon < epsilon_min:
            epsilon = epsilon_min

        # select an action from the agent's policy
        action = dqn_agent.select_action(state_list[0].squeeze(axis=-1),
                                         epsilon, env, batch_size)

        # enter action into the env
        reward_frame_skip = 0.
        for _ in range(frame_skip):
            next_state_list, reward_list, done_list, _ = env.step(action)
            stats_list[worker_level]['episode_reward'] += reward_list[0]
            reward_frame_skip += reward_list[0]
            if done_list[0]:
                break
        done = done_list[0]
        stats_list[worker_level]['episode_length'] += 1

        # add experience to replay buffer
        replay_buffer_list[worker_level].add(
            (state_list[0].squeeze(axis=-1),
             next_state_list[0].squeeze(axis=-1), action, reward_frame_skip,
             float(done)))

        if done:
            # env.reset doesn't reset the coinrun env but does produce image of first frame, which we can use get the worker_level
            state_list = env.reset()
            worker_level, level_worker_dict, levels_assigned = get_worker_level(
                state_list[0], level_worker_dict, levels_assigned, num_workers)

            #update stats
            stats_list[worker_level]['episode'] += 1
            #overall averages
            stats_list[worker_level]['mean_reward_total'] = (stats_list[worker_level]['mean_reward_total'] * (
                        stats_list[worker_level]['episode'] - 1) + stats_list[worker_level]['episode_reward']) / \
                                                            stats_list[worker_level]['episode']
            stats_list[worker_level]['mean_ep_length_total'] = (stats_list[worker_level]['mean_ep_length_total'] * (
                        stats_list[worker_level]['episode'] - 1) + stats_list[worker_level]['episode_length']) / \
                                                               stats_list[worker_level]['episode']
            # keep running average of last stats_every episodes
            if stats_list[worker_level]['episode'] >= stats_every:
                temp_episodes_num = stats_every
            else:
                temp_episodes_num = stats_list[worker_level]['episode']
            stats_list[worker_level]['mean_reward_recent'] = (
                stats_list[worker_level]['mean_reward_recent'] *
                (temp_episodes_num - 1) +
                stats_list[worker_level]['episode_reward']) / temp_episodes_num
            stats_list[worker_level]['mean_ep_length_recent'] = (
                stats_list[worker_level]['mean_ep_length_recent'] *
                (temp_episodes_num - 1) +
                stats_list[worker_level]['episode_length']) / temp_episodes_num
            # reset episode stats
            stats_list[worker_level]['episode_reward'] = 0.
            stats_list[worker_level]['episode_length'] = 0

            # print stats
            if stats_list[worker_level]['episode'] % stats_every == 0:
                print(
                    'w: {}'.format(worker_level),
                    'epi: {}'.format(stats_list[worker_level]['episode']),
                    't: {}'.format(ts), 'r: {:.1f}'.format(
                        stats_list[worker_level]['mean_reward_total']),
                    'l: {:.1f}'.format(
                        stats_list[worker_level]['mean_ep_length_total']),
                    'r r: {:.1f}'.format(
                        stats_list[worker_level]['mean_reward_recent']),
                    'r l: {:.1f}'.format(
                        stats_list[worker_level]['mean_ep_length_recent']),
                    'eps: {:.2f}'.format(epsilon), 'loss: {:.1f}'.format(
                        stats_list[worker_level]['episode_loss']))

                stats_list[worker_level]['episode_loss'] = 0.
        else:
            state_list = next_state_list

        if ts > start_training_after:
            # train the agent
            # typical DQN gather experiences and trains once every iteration
            # train_every can modify that to 'train_every' many times every 'train_every'th iteration
            # example: if train_every=10 then train 10 times every 10th iteration
            if ts % train_every == 0:
                # pysyft federated learning training
                # copy model to each worker
                # each worker trains on its own data from its own replay buffer
                # updated models from each worker sent to a secure worker who updates the new model
                worker_dqn_list = []
                worker_dqn_target_list = []
                worker_opt_list = []
                for i in range(num_workers):
                    worker_dqn_list.append(dqn_agent.train_net.copy().send(
                        worker_list[i]))
                    worker_dqn_target_list.append(
                        dqn_agent.target_net.copy().send(worker_list[i]))
                    worker_opt_list.append(
                        optim.Adam(params=worker_dqn_list[i].parameters(),
                                   lr=learning_rate))

                for i in range(num_workers):
                    for _ in range(train_every):
                        # sample a batch from the replay buffer
                        x0, x1, a, r, d = replay_buffer_list[i].sample(
                            batch_size)
                        # turn batches into tensors and attack to GPU if available
                        state_batch = torch.FloatTensor(x0).to(device)
                        state_batch = torch.unsqueeze(state_batch, dim=1)
                        next_state_batch = torch.FloatTensor(x1).to(device)
                        next_state_batch = torch.unsqueeze(next_state_batch,
                                                           dim=1)
                        action_batch = torch.LongTensor(a).to(device)
                        reward_batch = torch.FloatTensor(r).to(device)
                        done_batch = torch.FloatTensor(1. - d).to(device)

                        # send data to worker
                        worker_state_batch = state_batch.send(worker_list[i])
                        worker_next_state_batch = next_state_batch.send(
                            worker_list[i])
                        worker_action_batch = action_batch.send(worker_list[i])
                        worker_reward_batch = reward_batch.send(worker_list[i])
                        worker_done_batch = done_batch.send(worker_list[i])

                        train_q = worker_dqn_list[i](
                            worker_state_batch).gather(1, worker_action_batch)

                        with torch.no_grad():
                            # Double DQN: get argmax values from train network, use argmax in target network
                            train_argmax = worker_dqn_list[i](
                                worker_next_state_batch).max(1)[1].view(
                                    batch_size, 1)
                            target_net_q = worker_reward_batch + worker_done_batch * discount * \
                                            worker_dqn_target_list[i](worker_next_state_batch).gather(1, train_argmax)

                        # get loss between train q values and target q values
                        # DQN implementations typically use MSE loss or Huber loss (smooth_l1_loss is similar to Huber)
                        # loss_fn = nn.MSELoss()
                        # loss = loss_fn(train_q, target_net_q)
                        loss = F.smooth_l1_loss(train_q, target_net_q)

                        # optimize the parameters with the loss
                        worker_opt_list[i].zero_grad()
                        loss.backward()
                        for param in worker_dqn_list[i].parameters():
                            param.grad.data.clamp_(-1, 1)
                        worker_opt_list[i].step()
                        # get loss stats
                        #print("loss is {}".format(loss))
                        temp_loss = loss.get()
                        #print("loss get is {}".format(temp_loss))
                        stats_list[i]['episode_loss'] += temp_loss.detach(
                        ).cpu().numpy()

                    # move the worker trained model to secure worker for updating the centralized DQN
                    worker_dqn_list[i].move(secure_worker)
                    with torch.no_grad():
                        # first worker replaces centralized DQN parameters, then do keep a running average as each new worker's params are found
                        if i == 0:
                            dqn_agent.train_net.load_state_dict(
                                worker_dqn_list[i].get().state_dict())
                        else:
                            tau = 1. / (1 + i)
                            temp_net = worker_dqn_list[i].get()
                            for dqn_var, temp_var in zip(
                                    dqn_agent.train_net.parameters(),
                                    temp_net.parameters()):
                                dqn_var.data.copy_((1. - tau) * dqn_var.data +
                                                   (tau) * temp_var.data)

            # save the network
            if ts % save_every == 0:
                save_string = "saved_models/dqn_model_{}_{}.pt".format(
                    time_int, ts)
                torch.save(dqn_agent.train_net.state_dict(), save_string)
                stats_save_string = "saved_models/stats_{}_{}.pickle".format(
                    time_int, ts)
                with open(stats_save_string, 'wb') as handle:
                    pickle.dump(stats_list, handle)
            # update the target network
            dqn_agent.update_target_network_soft(ts, update_target_every, tau)

    print("save final model")
    save_string = "saved_models/dqn_model_{}_FINAL.pt".format(time_int)
    torch.save(dqn_agent.train_net.state_dict(), save_string)
    stats_save_string = "saved_models/stats_{}_FINAL.pickle".format(time_int)
    with open(stats_save_string, 'wb') as handle:
        pickle.dump(stats_list, handle)
Пример #20
0
def test_init():
    hook = syft.TorchHook(torch, verbose=True)
    tensor_extension = torch.Tensor()
    assert tensor_extension.id is not None
    assert tensor_extension.owner is not None
def experiment(no_cuda):

    # Creating num_workers clients

    hook = sy.TorchHook(torch)



    # Initializing arguments, with GPU usage or not
    args = Arguments(no_cuda)

    if args.use_virtual:
        alice = VirtualWorker(id="alice", hook=hook, verbose=args.verbose)
        bob = VirtualWorker(id="bob", hook=hook, verbose=args.verbose)
        charlie = VirtualWorker(id="charlie", hook=hook, verbose=args.verbose)
    else:
        kwargs_websocket = {"host": "localhost", "hook": hook, "verbose": args.verbose}
        alice = WebsocketClientWorker(id="alice", port=8777, **kwargs_websocket)
        bob = WebsocketClientWorker(id="bob", port=8778, **kwargs_websocket)
        charlie = WebsocketClientWorker(id="charlie", port=8779, **kwargs_websocket)
    use_cuda = not args.no_cuda and torch.cuda.is_available()
    if use_cuda:
    # TODO Quickhack. Actually need to fix the problem moving the model to CUDA\n",
        torch.set_default_tensor_type(torch.cuda.FloatTensor)

    torch.manual_seed(args.seed)

    clients = [alice, bob, charlie]
    clients_mem = torch.zeros(len(clients))

    device = torch.device("cuda" if use_cuda else "cpu")
    kwargs = {'num_workers': 0, 'pin_memory': False} if use_cuda else {}


    # Federated data loader
    federated_train_loader = sy.FederatedDataLoader( # <-- this is now a FederatedDataLoader
      datasets.MNIST('../data', train=True, download=True,
                     transform=transforms.Compose([
                         transforms.ToTensor(),
                         transforms.Normalize((0.1307,), (0.3081,))
                     ]))
      .federate(clients), # <-- NEW: we distribute the dataset across all the workers, it's now a FederatedDataset
      batch_size=args.batch_size, shuffle=True, **kwargs)

    test_loader = torch.utils.data.DataLoader(
      datasets.MNIST('../data', train=False, transform=transforms.Compose([
                         transforms.ToTensor(),
                         transforms.Normalize((0.1307,), (0.3081,))
                     ])),
      batch_size=args.test_batch_size, shuffle=True, **kwargs)


    #creating the models for each client
    models,optimizers = [], []
    #print(device)
    for i in range(len(clients)):
        #print(i)
        models.append(Net1().to(device))
        models[i] = models[i].send(clients[i])
        optimizers.append(optim.SGD(params=models[i].parameters(),lr=0.1))



    start = time.time()
    #%%time
    model = Net2().to(device)
    optimizer = optim.SGD(model.parameters(), lr=args.lr) # TODO momentum is not supported at the moment

    for epoch in range(1, args.epochs + 1):
        train(args, model, device, federated_train_loader, optimizer, epoch, models, optimizers,clients_mem)
        test(args, model, device, test_loader, models)
        t = time.time()
        print(t-start)
    if (args.save_model):
        torch.save(model.state_dict(), "mnist_cnn.pt")

    end = time.time()
    print(end - start)
    print("Memory exchanged : ",clients_mem)
    return clients_mem
Пример #22
0
import torch
import syft as sy  # <-- NEW: import the Pysyft library
import random
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from visdom import Visdom
from datetime import datetime
hook = sy.TorchHook(
    torch
)  # <-- NEW: hook PyTorch ie add extra functionalities to support Federated Learning
import ComputePrivacy as Privacy  # Import self definition function to compute the privacy loss
import logging
import Datasets  # Import self definition function to load the federated datasets
import os
logger = logging.getLogger(__name__)
date = datetime.now().strftime('%Y-%m-%d %H:%M')
vis = Visdom(env='CELEBA_FixDP_C_Asyn_08_flat')


# Define parameters
class Arguments():
    def __init__(self):
        self.batch_size = 5  # Number of samples used of each user/device at each iteration.
        # If this value is less than 1, then it means the sampling ratio, else it means the mini-batch size
        self.lr = 0.01  # Learning rate
        self.grad_upper_bound = torch.tensor([1.])  # clipbound
        self.z = 0.8  # Noise parameter z in Gaussian noise N(0, (zS)^2) where S is sensitivity
        self.users_total = 800  # Total number of users/devices
        self.user_sel_prob = 0.02  # Probability for sampling users/devices at each iteration
optimizers = [bobs_optimizer,  alice_optimizer]


#from syft.core.hooks import TorchHook
#from syft.core.workers import VirtualWorker
import torch
import torch.nn as nn
from torch.autograd import Variable as Var
import torch.optim as optim

import syft as sy 


# this is our hook
hook = sy.TorchHook()
me = hook.local_worker
me.is_client_worker = False 

bob = sy.VirtualWorker(id='bob',hook=hook, is_client_worker=False)
alice = sy.VirtualWorker(id='alice',hook=hook, is_client_worker=False)

#me.add_workers([bob, alice])
bob.add_workers([alice])
alice.add_workers([bob])

compute_nodes = [bob, alice]
train_distributed_dataset  = []

i = 0
for batch_idx, (data,target) in enumerate(train_loader):
Пример #24
0
    parser.add_argument(
        "--id",
        type=str,
        help="name (id) of the websocket server worker, e.g. --id alice")
    parser.add_argument(
        "--testing",
        action="store_true",
        help=
        "if set, websocket server worker will load the test dataset instead of the training dataset",
    )
    parser.add_argument(
        "--verbose",
        "-v",
        action="store_true",
        help="if set, websocket server worker will be started in verbose mode",
    )

    args = parser.parse_args()

    # Hook and start server
    hook = sy.TorchHook(torch)
    server = start_websocket_server_worker(
        id=args.id,
        host=args.host,
        port=args.port,
        hook=hook,
        verbose=args.verbose,
        keep_labels=KEEP_LABELS_DICT[args.id],
        training=args.testing,
    )
Пример #25
0
async def main():
    """ Main """

    hook = sy.TorchHook(torch)
    parser = argparse.ArgumentParser(description='Train and validate a Federated model')
    parser.add_argument('config', type=str, help='Configuration file')
    args = parser.parse_args()
    config = configparser.ConfigParser()
    config.read(args.config)

    # Train configuration

    config_rounds = config.getint('TRAIN', 'rounds')
    config_epochs = config.getint('TRAIN', 'epochs')
    config_batch = config.getint('TRAIN', 'batch')
    config_optimizer = config.get('TRAIN', 'optimizer')
    config_lr = config.getfloat('TRAIN', 'lr')
    config_shuffle = config.getboolean('TRAIN', 'shuffle')

    clients = {}
    clients_results = {}

    for section in config.sections():
        if section.startswith('WORKER'):
            kwargs_websocket = {'hook': hook, 'id': config.get(section, 'id'), 'host': config.get(section, 'host'),
                                'port': config.getint(section, 'port'),
                                'verbose': config.getboolean(section, 'verbose')}
            federation_participant = config.getboolean(section, 'federation_participant')
            client = CustomWebsocketClientWorker(**kwargs_websocket)
            client.federation_participant = federation_participant
            client.clear_objects_remote()
            clients[kwargs_websocket['id']] = client
            clients_results[kwargs_websocket['id']] = []

    model = Classifier()
    traced_model = trace(model, torch.zeros([1, 10], dtype=torch.float))

    for curr_round in range(config_rounds):

        print('Round %s/%s ¡Ding Ding!:' % (curr_round + 1, config_rounds))

        results = await asyncio.gather(
            *[
                fit_model_on_worker(
                    worker=clients[client],
                    traced_model=traced_model,
                    optimizer=config_optimizer,
                    batch_size=config_batch,
                    epochs=config_epochs,
                    lr=config_lr,
                    dataset_key='test',
                    shuffle=config_shuffle
                )
                for client in clients if clients[client].federation_participant
            ]
        )

        print('Training done!')

        print('Federating model ... ', end='')
        models = {}
        for worker_id, worker_model in results:
            if worker_model is not None:
                models[worker_id] = worker_model
        traced_model = utils.federated_avg(models)
        print('Done!')

        for client in clients:
            # Evaluate train
            train_loss, train_confusion_matrix = evaluate_model_on_worker(
                worker=clients[client],
                dataset_key='train',
                model=traced_model,
                batch_size=config_batch,
            )
            # Evaluate test
            test_loss, test_confusion_matrix = evaluate_model_on_worker(
                worker=clients[client],
                dataset_key='test',
                model=traced_model,
                batch_size=config_batch,
            )

            clients_results[client].append((train_loss, test_loss, test_confusion_matrix))
            print('"%s" => Train loss: %.4f. Test loss: %.4f' % (client, train_loss, test_loss))

    print('Confusion matrices:')

    for client in clients_results:
        print('Model "%s" stats:' % client)
        train_losses = [cr[0] for cr in clients_results[client]]
        test_losses = [cr[1] for cr in clients_results[client]]
        conf_matrices = [cr[2] for cr in clients_results[client]]
        show_results(conf_matrices, train_losses, test_losses, label=client, loss_xlabel='Round')