def __connect_with_node(self, node_id, node_url): if node_id not in self.hook.local_worker._known_workers: worker = NodeClient(self.hook, node_url, credential=self.credential) else: # There is already a connection to this node worker = self.hook.local_worker._known_workers[node_id] worker.connect() return worker
def main(): hook = sy.TorchHook(torch) device = torch.device("cpu") model = Net() model.build(torch.zeros([1, 1, 28, 28], dtype=torch.float).to(device)) # model.build(torch.zeros([1, node_num], dtype=torch.float).to(device)) @sy.func2plan() def loss_fn(pred, target): return nll_loss(input=pred, target=target) input_num = torch.randn(3, 5, requires_grad=True) target = torch.tensor([1, 0, 4]) dummy_pred = F.log_softmax(input_num, dim=1) loss_fn.build(dummy_pred, target) built_model = model built_loss_fn = loss_fn epoch_num = 21 batch_size = 64 lr = 0.1 learning_rate = lr optimizer_args = {"lr": lr} alice = NodeClient(hook, "ws://10.0.17.6:6666", id="alice") # bob = NodeClient(hook, "ws://172.16.179.21:6667" , id="bob") # charlie = NodeClient(hook, "ws://172.16.179.22:6668", id="charlie") worker_list = [alice] # worker_list = [alice] grid = sy.PrivateGridNetwork(*worker_list) for epoch in range(epoch_num): logger.info("round %s/%s", epoch, epoch_num) epoch_start = time.time() jobs = [ gevent.spawn(send_model_to_worker, worker, built_model) for worker in worker_list ] gevent.joinall(jobs) # results = await asyncio.gather( # *[ # send_model_to_worker( # worker=worker, # built_model=built_model, # ) # for worker in worker_list # ] # ) print("[PROF]", "AllWorkerSend", "duration", "COORD", time.time() - epoch_start) built_model.pointers = {} built_loss_fn.pointers = {}
def move_current_data_to_training(app): """ Read all current sensor data from the db, pre-process it for model training and send it to the local worker. :param app: The flask app context for accessing the db :return: None """ with app.app_context(): # read all sensor data from DB into a pandas frame new_train_data = pd.read_sql(db.session.query(SensorData).statement, db.session.bind) # data preprocessing for training new_train_data = data_helper.add_rul_to_train_data(new_train_data) data_helper.drop_unnecessary_columns(new_train_data) x_train_new, y_train_new = data_helper.transform_to_windowed_data(new_train_data, with_labels=True) y_train_new = data_helper.clip_rul(y_train_new) # transform to torch tensors tensor_x_train_new = torch.Tensor(x_train_new) tensor_y_train_new = torch.Tensor(y_train_new) # tag the data so it can be searched within the grid tensor_x_train_new = tensor_x_train_new.tag("#X", "#turbofan", "#dataset").describe("The input datapoints to the turbofan dataset.") tensor_y_train_new = tensor_y_train_new.tag("#Y", "#turbofan", "#dataset").describe("The input labels to the turbofan dataset.") # send the data to the grid node grid_node = NodeClient(hook, address="ws://{}".format(config_helper.grid_node_address)) shared_data.append(tensor_x_train_new.send(grid_node)) shared_labels.append(tensor_y_train_new.send(grid_node)) # delete current sensor data from db with app.app_context(): delete_sensor_data() return None
def connect_grid_nodes(message: dict) -> str: """ Connect remote grid nodes between each other. Args: message (dict) : Dict data structure containing node_id, node address and user credentials(optional). Returns: response (str) : response message. """ # If found any credential credentials = message.get("auth") if credentials: credentials = AccountCredential( username=credentials["username"], password=credentials["password"] ) if message["id"] not in local_worker._known_workers: worker = NodeClient( hook, address=message["address"], id=message["id"], credential=credentials ) return json.dumps({"status": "Succesfully connected."})
async def main(): hook = sy.TorchHook(torch) device = torch.device("cpu") optimizer = "SGD" epochs = 1 shuffle = True model = Net() model.build(torch.zeros([1, 1, 28, 28], dtype=torch.float).to(device)) # model.build(torch.zeros([2], dtype=torch.float).to(device)) @sy.func2plan(args_shape=[(-1, 1), (-1, 1)]) def loss_fn(target, pred): return ((target.view(pred.shape).float() - pred.float())**2).mean() batch_size = 64 lr = 0.1 learning_rate = lr optimizer_args = {"lr": lr} model_config = sy.ModelConfig(model=model, loss_fn=loss_fn, optimizer=optimizer, batch_size=batch_size, optimizer_args=optimizer_args, epochs=epochs, shuffle=shuffle) # alice = NodeClient(hook, "ws://172.16.179.20:6666" , id="alice") # bob = NodeClient(hook, "ws://172.16.179.21:6667" , id="bob") # charlie = NodeClient(hook, "ws://172.16.179.22:6668", id="charlie") # testing = NodeClient(hook, "ws://localhost:6669" , id="testing") # worker_list = [alice, bob, charlie] worker_list = [] for i in range(2, 2 + 12): worker = NodeClient(hook, "ws://" + flvm_ip[i] + ":6666", id="flvm-" + str(i)) worker_list.append(worker) for worker in worker_list: model_config.send(worker) grid = sy.PrivateGridNetwork(*worker_list) num_of_parameters = len(model.parameters()) return_ids = [] for i in range(num_of_parameters): return_ids.append("p" + str(i)) start = time.time() # worker_0 = worker_list[0] # worker_1 = worker_list[1] # worker_2 = worker_list[2] enc_results = await asyncio.gather(*[ worker.async_model_share(worker_list, return_ids=return_ids) for worker in worker_list ]) end = time.time() ## aggregation dst_enc_model = enc_results[0] agg_start = time.time() with torch.no_grad(): for i in range(len(dst_enc_model)): layer_start = time.time() for j in range(1, len(enc_results)): add_start = time.time() dst_enc_model[i] += enc_results[j][i] print("[PROF]", "AddParams", time.time() - add_start) print("[PROF]", "Layer" + str(i), time.time() - layer_start) print("[PROF]", "AggTime", time.time() - agg_start)
async def main(): hook = sy.TorchHook(torch) device = torch.device("cpu") model = Net() model.build(torch.zeros([1, 1, 28, 28], dtype=torch.float).to(device)) # model.build(torch.zeros([1, node_num], dtype=torch.float).to(device)) @sy.func2plan() def loss_fn(pred, target): return nll_loss(input=pred, target=target) input_num = torch.randn(3, 5, requires_grad=True) target = torch.tensor([1, 0, 4]) dummy_pred = F.log_softmax(input_num, dim=1) loss_fn.build(dummy_pred, target) built_model = model built_loss_fn = loss_fn epoch_num = 21 batch_size = 64 lr = 0.1 learning_rate = lr optimizer_args = {"lr": lr} if ssl_args == "ssl_true": alice = NodeClient(hook, "wss://10.0.17.6:6666", id="alice") else: alice = NodeClient(hook, "ws://10.0.17.6:6666", id="alice") # bob = NodeClient(hook, "ws://172.16.179.22:6667" , id="bob") # charlie = NodeClient(hook, "ws://172.16.179.23:6668", id="charlie") # med24 = NodeClient(hook, "ws://172.16.179.24:6669", id="med24") # testing = NodeClient(hook, "ws://localhost:6669" , id="testing") # worker_list = [alice, bob, charlie] worker_list = [alice] grid = sy.PrivateGridNetwork(*worker_list) for epoch in range(epoch_num): logger.info("round %s/%s", epoch, epoch_num) for worker in worker_list: built_model.id = "GlobalModel" # built_loss_fn.id = "LossFunc" # model_config = sy.ModelConfig(model=built_model, # loss_fn=built_loss_fn, # optimizer="SGD", # batch_size=batch_size, # optimizer_args={"lr": lr}, # epochs=1, # max_nr_batches=-1) model_send_start = time.time() ##pdb.set_trace() built_model.send(worker) model_send_end = time.time() # print("[TEST]", "ModelSend", "time", model_send_start, model_send_end) print("[trace] ModelSend duration", worker.id, model_send_end - model_send_start) built_model.pointers = {} built_loss_fn.pointers = {} # decay learning rate learning_rate = max(0.98 * learning_rate, lr * 0.01)
async def main(): hook = sy.TorchHook(torch) device = torch.device("cpu") model = vgg.vgg16(pretrained=False) # pdb.set_trace() model.build(torch.zeros([64, 3, 32, 32], dtype=torch.float).to(device)) # pdb.set_trace() @sy.func2plan() def loss_fn(pred, target): return nll_loss(input=pred, target=target) input_num = torch.randn(3, 5, requires_grad=True) target = torch.tensor([1, 0, 4]) dummy_pred = F.log_softmax(input_num, dim=1) loss_fn.build(dummy_pred, target) epoch_num = 11 batch_size = 64 lr = 0.05 learning_rate = lr optimizer_args = {"lr": lr} alice = NodeClient(hook, "ws://172.16.179.20:6666", id="alice") bob = NodeClient(hook, "ws://172.16.179.21:6667", id="bob") charlie = NodeClient(hook, "ws://172.16.179.22:6668", id="charlie") # testing = NodeClient(hook, "ws://localhost:6669" , id="testing") worker_list = [alice, bob, charlie] grid = sy.PrivateGridNetwork(*worker_list) for epoch in range(epoch_num): logger.info("Training round %s/%s", epoch, epoch_num) round_start_time = time.time() results = await asyncio.gather(*[ fit_model_on_worker( worker=worker, built_model=model, built_loss_fn=loss_fn, encrypters=worker_list, batch_size=batch_size, curr_round=epoch, max_nr_batches=-1, lr=0.1, ) for worker in worker_list ]) local_train_end_time = time.time() print("[trace]", "AllWorkersTrainingTime", "duration", "COORD", local_train_end_time - round_start_time) enc_models = {} loss_values = {} data_amounts = {} total_data_amount = 0 for worker_id, enc_params, worker_loss, num_of_training_data in results: if enc_params is not None: enc_models[worker_id] = enc_params loss_values[worker_id] = worker_loss data_amounts[worker_id] = num_of_training_data total_data_amount += num_of_training_data ## aggregation nr_enc_models = len(enc_models) enc_models_list = list(enc_models.values()) data_amounts_list = list(data_amounts.values()) ## dst_enc_model = enc_models_list[0] aggregation_start_time = time.time() with torch.no_grad(): for i in range(len(dst_enc_model)): for j in range(1, nr_enc_models): dst_enc_model[i] += enc_models_list[j][i] aggregation_end_time = time.time() print("[trace]", "AggregationTime", "duration", "COORD", aggregation_end_time - aggregation_start_time) ## decryption new_params = [] decryption_start_time = time.time() with torch.no_grad(): for i in range(len(dst_enc_model)): decrypt_para = dst_enc_model[i].get() new_para = decrypt_para.float_precision() new_para = new_para / int(total_data_amount) model.parameters()[i].set_(new_para) round_end_time = time.time() print("[trace]", "DecryptionTime", "duration", "COORD", round_end_time - decryption_start_time) print("[trace]", "RoundTime", "duration", "COORD", round_end_time - round_start_time) ## FedAvg # nr_models = len(models) # model_list = list(models.values()) # dst_model = model_list[0] # nr_params = len(dst_model.parameters()) # with torch.no_grad(): # for i in range(1, nr_models): # src_model = model_list[i] # src_params = src_model.parameters() # dst_params = dst_model.parameters() # for i in range(nr_params): # dst_params[i].set_(src_params[i].data + dst_params[i].data) # for i in range(nr_params): # dst_params[i].set_(dst_params[i].data * 1/total_data_amount) # if epoch%5 == 0 or epoch == 49: # evaluate_model_on_worker( # model_identifier="Federated model", # worker=testing, # dataset_key="mnist_testing", # model=model, # built_loss_fn=loss_fn, # nr_bins=10, # batch_size=64, # device=device, # print_target_hist=False, # ) model.pointers = {} loss_fn.pointers = {} # decay learning rate learning_rate = max(0.98 * learning_rate, lr * 0.01)
from torchvision import datasets, transforms import tqdm import torch as th import torch.nn as nn import torch.optim as optim import torch.nn.functional as F hook = sy.TorchHook(torch) # Connect directly to grid nodes nodes = ["ws://localhost:3000/", "ws://localhost:3001/"] compute_nodes = [] for node in nodes: compute_nodes.append(NodeClient(hook, node)) N_SAMPLES = 10000 MNIST_PATH = './dataset' transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )), ]) trainset = datasets.MNIST(MNIST_PATH, download=True, train=True, transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=N_SAMPLES,
def create_websocket_client(hook, port, id): node = NodeClient(hook, "http://localhost:" + port + "/", id=id) return node
import sys node_num = int(sys.argv[1]) # Model class Net(sy.Plan): def __init__(self): super(Net, self).__init__() self.fc1 = nn.Linear(node_num, 1) self.fc2 = nn.Linear(1, node_num) def forward(self, x): x = F.relu(self.fc1(x)) x = self.fc2(x) return F.log_softmax(x, dim=1) hook = sy.TorchHook(torch) alice = NodeClient(hook, "ws://10.0.17.6:6666", id="flvm-2") for i in range(21): model = Net() model.build(torch.zeros([1, node_num], dtype=torch.float)) ptr_model = model.send(alice) start_time = time.time() m = ptr_model.get() end_time = time.time() print("[PROF]", "GetTime", "duration", "COORD", end_time - start_time)
from syft.workers import websocket_client import argparse import os import syft as sy import torch import numpy as np from torchvision import datasets from torchvision import transforms node_num = int(sys.argv[1]) LOG_INTERVAL = 25 logger = logging.getLogger("run_websocket_client") hook = sy.TorchHook(torch) alice = NodeClient(hook, "ws://172.16.179.20:6666", id="alice") bob = NodeClient(hook, "ws://172.16.179.21:6667", id="bob") charlie = NodeClient(hook, "ws://172.16.179.22:6668", id="charlie") num_a = torch.ones([node_num]) num_a = num_a * 3 fix_a = num_a.fix_precision() num_b = torch.ones([node_num]) num_b = num_b * 4 fix_b = num_b.fix_precision() ## encrypt enc_a = fix_a.share(alice, bob, charlie) enc_b = fix_b.share(alice, bob, charlie)