def __init__( self, x, # data y, # labels client_id, # id of the client config, # configuration dict seed: int = 51550): # see for random gen # set self.n_clusters = config['n_clusters'] self.ae_dims = config['ae_dims'] self.ae_local_epochs = config['ae_local_epochs'] self.ae_optimizer = SGD(lr=config['ae_lr'], momentum=config['ae_momentum']) self.ae_loss = config['ae_loss'] self.cl_local_epochs = config['cl_local_epochs'] self.cl_optimizer = SGD(lr=config['cl_lr'], momentum=config['cl_momentum']) self.cl_loss = config['cl_loss'] self.update_interval = config['update_interval'] self.kmeans_local_epochs = config['kmeans_local_epochs'] self.kmeans_n_init = config['kmeans_n_init'] if y is None: self.x_train, self.x_test = split_dataset(x) self.y_train = self.y_test = None else: self.x_train, self.y_train, self.x_test, self.y_test = split_dataset( x, y) self.batch_size = config['batch_size'] self.client_id = client_id self.seed = seed # default self.autoencoder = None self.encoder = None self.clustering_model = None self.f_round = 0 self.p = None self.local_iter = 0 self.step = None self.cluster_centers = None
def __init__(self, x, y, client_id, config, outcomes=None, ids=None, output_folder=None, seed: int = 51550): # set train_idx, test_idx = split_dataset(x=x, splits=config['splits'], shuffle=config['shuffle'], fold_n=config['fold_n']) self.x_train, self.x_test = x[train_idx], x[test_idx] self.y_train = self.y_test = None self.outcomes_train = self.outcomes_test = None self.ids_train = self.ids_test = None if y is not None: self.y_train, self.y_test = y[train_idx], y[test_idx] if outcomes is not None: self.outcomes_train, self.outcomes_test = outcomes[ train_idx], outcomes[test_idx] if ids is not None: self.ids_train, self.ids_test = ids[train_idx], ids[test_idx] self.client_id = client_id self.seed = seed self.kmeans_local_epochs = config['kmeans_local_epochs'] self.n_clusters = config['n_clusters'] self.kmeans_n_init = config['kmeans_n_init'] # default self.kmeans = None self.f_round = 0 self.p = None self.step = None if output_folder is None: self.out_dir = output_folder else: self.out_dir = pathlib.Path(output_folder) os.makedirs(self.out_dir, exist_ok=True)
def __init__( self, x, # data y, # labels client_id, # id of the client config, # configuration dictionary outcomes=None, # outcomes for lifelines ids=None, # ids of data output_folder=None, seed: int = 51550): # see for random gen # set self.n_clusters = config['n_clusters'] self.ae_dims = config['ae_dims'] self.ae_local_epochs = config['ae_local_epochs'] self.ae_optimizer = SGD(lr=config['ae_lr'], momentum=config['ae_momentum']) self.ae_loss = config['ae_loss'] self.cl_optimizer = SGD(learning_rate=config['cl_lr'], momentum=config['cl_momentum']) self.cl_local_epochs = config['cl_local_epochs'] self.cl_optimizer = SGD(lr=config['cl_lr'], momentum=config['cl_momentum']) self.cl_loss = config['cl_loss'] self.update_interval = config['update_interval'] self.kmeans_n_init = config['kmeans_n_init'] self.kmeans_local_epochs = config['kmeans_local_epochs'] train_idx, test_idx = split_dataset(x=x, splits=config['splits'], shuffle=config['shuffle'], fold_n=config['fold_n']) self.x_train = x[train_idx] self.x_test = x[test_idx] self.y_train = self.y_test = None self.outcomes_train = self.outcomes_test = None self.id_train = self.id_test = None if y is not None: self.y_test = y[test_idx] self.y_train = y[train_idx] if outcomes is not None: self.outcomes_train = outcomes[train_idx] self.outcomes_test = outcomes[test_idx] if ids is not None: self.id_train = ids[train_idx] self.id_test = ids[test_idx] self.batch_size = config['batch_size'] self.client_id = client_id self.seed = seed if output_folder is None: self.out_dir = output_folder else: self.out_dir = pathlib.Path(output_folder) os.makedirs(self.out_dir, exist_ok=True) # default self.autoencoder = None self.encoder = None self.clustering_model = None self.f_round = 0 self.p = None self.local_iter = 0 self.step = None self.cluster_centers = None
def __init__(self, x, y, config, ids=None, outcomes=None, client_id: int = 0, hardw_acc_flag: bool = False, output_folder=None): # Training details self.n_epochs = config['n_local_epochs'] self.lr = config['learning_rate'] self.b1 = config['beta_1'] self.b2 = config['beta_2'] self.decay = config['decay'] self.n_skip_iter = config['d_step'] # Data dimensions self.x_shape = config['x_shape'] # Latent space info self.latent_dim = config['latent_dim'] self.n_c = config['n_clusters'] self.betan = config['betan'] self.betac = config['betac'] # Wasserstein+GP metric flag self.wass_metric = config['wass_metric'] print('Using metric {}'.format( 'Wassestrain' if self.wass_metric else 'Vanilla')) self.cuda = True if torch.cuda.is_available( ) and hardw_acc_flag else False self.device = torch.device('cuda:0' if self.cuda else 'cpu') print('Using device {}'.format(self.device)) torch.autograd.set_detect_anomaly(True) # Loss function self.bce_loss = torch.nn.BCELoss() self.xe_loss = torch.nn.CrossEntropyLoss() self.mse_loss = torch.nn.MSELoss() # Initialize NNs if config['conv_net']: self.generator = ConvGeneratorCNN(self.latent_dim, self.n_c, self.x_shape) self.encoder = ConvEncoderCNN(self.latent_dim, self.n_c) self.discriminator = ConvDiscriminatorCNN( wass_metric=self.wass_metric) else: self.generator = GeneratorCNN(latent_dim=self.latent_dim, n_c=self.n_c, gen_dims=config['gen_dims'], x_shape=self.x_shape) self.encoder = EncoderCNN(latent_dim=self.latent_dim, enc_dims=config['enc_dims'], n_c=self.n_c) self.discriminator = DiscriminatorCNN( disc_dims=config['disc_dims'], wass_metric=self.wass_metric) if self.cuda: self.generator.cuda() self.encoder.cuda() self.discriminator.cuda() self.bce_loss.cuda() self.xe_loss.cuda() self.mse_loss.cuda() self.TENSOR = torch.cuda.FloatTensor if self.cuda else torch.FloatTensor # Configure data loader self.batch_size = config['batch_size'] train_idx, test_idx = split_dataset(x=x, splits=config['splits'], shuffle=config['shuffle'], fold_n=config['fold_n']) self.x_train = x[train_idx] self.y_train = y[train_idx] self.x_test = x[test_idx] self.y_test = y[test_idx] if outcomes is None or ids is None: self.trainloader = DataLoader(PrepareDataSimple(self.x_train, y=self.y_train), batch_size=self.batch_size) self.testloader = DataLoader(PrepareDataSimple(self.x_test, y=self.y_test), batch_size=self.batch_size) else: self.id_train = ids[train_idx] self.outcomes_train = outcomes[train_idx] self.id_test = ids[test_idx] self.outcomes_test = outcomes[test_idx] self.trainloader = DataLoader(PrepareData( x=self.x_train, y=self.y_train, ids=self.id_train, outcomes=self.outcomes_train), batch_size=self.batch_size) self.testloader = DataLoader(PrepareData( x=self.x_test, y=self.y_test, ids=self.id_test, outcomes=self.outcomes_test), batch_size=self.batch_size) self.ge_chain = ichain(self.generator.parameters(), self.encoder.parameters()) self.optimizer_GE = torch.optim.Adam(self.ge_chain, lr=self.lr, betas=(self.b1, self.b2), weight_decay=self.decay) self.optimizer_D = torch.optim.Adam(self.discriminator.parameters(), lr=self.lr, betas=(self.b1, self.b2)) # ---------- # Training # ---------- self.ge_l = [] self.d_l = [] self.c_zn = [] self.c_zc = [] self.c_i = [] # metrics self.img_mse_loss = None self.lat_mse_loss = None self.lat_xe_loss = None # leghts of NN parameters to send and receive self.g_w_l = len(self.generator.state_dict().items()) self.d_w_l = len(self.discriminator.state_dict().items()) self.e_w_l = len(self.encoder.state_dict().items()) # initiliazing to zero the federated epochs counter self.f_epoch = 0 # for saving images self.save_images = config['save_images'] self.client_id = client_id if output_folder is None: self.out_dir = output_folder self.img_dir = 'client_%d_images' % (self.client_id) os.makedirs(self.img_dir, exist_ok=True) else: self.out_dir = pathlib.Path(output_folder) os.makedirs(self.out_dir, exist_ok=True)
for label, row in prob.iterrows(): if np.sum(row) > 0: y.append(row.argmax()) else: y.append(-1) y = np.array(y) # getting the outcomes outcomes = data_util.get_outcome_euromds_dataset() # getting IDs ids = data_util.get_euromds_ids() n_features = len(x.columns) x = np.array(x) outcomes = np.array(outcomes) ids = np.array(ids) # cross-val train_idx, test_idx = data_util.split_dataset(x=x, splits=5, fold_n=0) # dividing data x_train = x[train_idx] y_train = y[train_idx] id_train = ids[train_idx] outcomes_train = outcomes[train_idx] x_test = x[test_idx] y_test = y[test_idx] id_test = ids[test_idx] outcomes_test = outcomes[test_idx] dataloader = DataLoader(data_util.PrepareData(x=x_train, y=y_train, ids=id_train, outcomes=outcomes_train), batch_size=batch_size) testloader = DataLoader(data_util.PrepareData(x=x_test,