Пример #1
0
 def __init__(
         self,
         x,  # data
         y,  # labels
         client_id,  # id of the client
         config,  # configuration dict
         seed: int = 51550):  # see for random gen
     # set
     self.n_clusters = config['n_clusters']
     self.ae_dims = config['ae_dims']
     self.ae_local_epochs = config['ae_local_epochs']
     self.ae_optimizer = SGD(lr=config['ae_lr'],
                             momentum=config['ae_momentum'])
     self.ae_loss = config['ae_loss']
     self.cl_local_epochs = config['cl_local_epochs']
     self.cl_optimizer = SGD(lr=config['cl_lr'],
                             momentum=config['cl_momentum'])
     self.cl_loss = config['cl_loss']
     self.update_interval = config['update_interval']
     self.kmeans_local_epochs = config['kmeans_local_epochs']
     self.kmeans_n_init = config['kmeans_n_init']
     if y is None:
         self.x_train, self.x_test = split_dataset(x)
         self.y_train = self.y_test = None
     else:
         self.x_train, self.y_train, self.x_test, self.y_test = split_dataset(
             x, y)
     self.batch_size = config['batch_size']
     self.client_id = client_id
     self.seed = seed
     # default
     self.autoencoder = None
     self.encoder = None
     self.clustering_model = None
     self.f_round = 0
     self.p = None
     self.local_iter = 0
     self.step = None
     self.cluster_centers = None
Пример #2
0
    def __init__(self,
                 x,
                 y,
                 client_id,
                 config,
                 outcomes=None,
                 ids=None,
                 output_folder=None,
                 seed: int = 51550):
        # set
        train_idx, test_idx = split_dataset(x=x,
                                            splits=config['splits'],
                                            shuffle=config['shuffle'],
                                            fold_n=config['fold_n'])

        self.x_train, self.x_test = x[train_idx], x[test_idx]
        self.y_train = self.y_test = None
        self.outcomes_train = self.outcomes_test = None
        self.ids_train = self.ids_test = None
        if y is not None:
            self.y_train, self.y_test = y[train_idx], y[test_idx]
        if outcomes is not None:
            self.outcomes_train, self.outcomes_test = outcomes[
                train_idx], outcomes[test_idx]
        if ids is not None:
            self.ids_train, self.ids_test = ids[train_idx], ids[test_idx]
        self.client_id = client_id
        self.seed = seed
        self.kmeans_local_epochs = config['kmeans_local_epochs']
        self.n_clusters = config['n_clusters']
        self.kmeans_n_init = config['kmeans_n_init']
        # default
        self.kmeans = None
        self.f_round = 0
        self.p = None
        self.step = None
        if output_folder is None:
            self.out_dir = output_folder
        else:
            self.out_dir = pathlib.Path(output_folder)
            os.makedirs(self.out_dir, exist_ok=True)
Пример #3
0
    def __init__(
            self,
            x,  # data
            y,  # labels
            client_id,  # id of the client
            config,  # configuration dictionary
            outcomes=None,  # outcomes for lifelines
            ids=None,  # ids of data
            output_folder=None,
            seed: int = 51550):  # see for random gen
        # set
        self.n_clusters = config['n_clusters']
        self.ae_dims = config['ae_dims']
        self.ae_local_epochs = config['ae_local_epochs']
        self.ae_optimizer = SGD(lr=config['ae_lr'],
                                momentum=config['ae_momentum'])
        self.ae_loss = config['ae_loss']
        self.cl_optimizer = SGD(learning_rate=config['cl_lr'],
                                momentum=config['cl_momentum'])
        self.cl_local_epochs = config['cl_local_epochs']
        self.cl_optimizer = SGD(lr=config['cl_lr'],
                                momentum=config['cl_momentum'])
        self.cl_loss = config['cl_loss']
        self.update_interval = config['update_interval']
        self.kmeans_n_init = config['kmeans_n_init']
        self.kmeans_local_epochs = config['kmeans_local_epochs']

        train_idx, test_idx = split_dataset(x=x,
                                            splits=config['splits'],
                                            shuffle=config['shuffle'],
                                            fold_n=config['fold_n'])

        self.x_train = x[train_idx]
        self.x_test = x[test_idx]
        self.y_train = self.y_test = None
        self.outcomes_train = self.outcomes_test = None
        self.id_train = self.id_test = None
        if y is not None:
            self.y_test = y[test_idx]
            self.y_train = y[train_idx]
        if outcomes is not None:
            self.outcomes_train = outcomes[train_idx]
            self.outcomes_test = outcomes[test_idx]
        if ids is not None:
            self.id_train = ids[train_idx]
            self.id_test = ids[test_idx]

        self.batch_size = config['batch_size']
        self.client_id = client_id
        self.seed = seed

        if output_folder is None:
            self.out_dir = output_folder
        else:
            self.out_dir = pathlib.Path(output_folder)
            os.makedirs(self.out_dir, exist_ok=True)

        # default
        self.autoencoder = None
        self.encoder = None
        self.clustering_model = None
        self.f_round = 0
        self.p = None
        self.local_iter = 0
        self.step = None
        self.cluster_centers = None
Пример #4
0
    def __init__(self,
                 x,
                 y,
                 config,
                 ids=None,
                 outcomes=None,
                 client_id: int = 0,
                 hardw_acc_flag: bool = False,
                 output_folder=None):
        # Training details
        self.n_epochs = config['n_local_epochs']
        self.lr = config['learning_rate']
        self.b1 = config['beta_1']
        self.b2 = config['beta_2']
        self.decay = config['decay']
        self.n_skip_iter = config['d_step']

        # Data dimensions
        self.x_shape = config['x_shape']
        # Latent space info
        self.latent_dim = config['latent_dim']
        self.n_c = config['n_clusters']
        self.betan = config['betan']
        self.betac = config['betac']

        # Wasserstein+GP metric flag
        self.wass_metric = config['wass_metric']
        print('Using metric {}'.format(
            'Wassestrain' if self.wass_metric else 'Vanilla'))

        self.cuda = True if torch.cuda.is_available(
        ) and hardw_acc_flag else False
        self.device = torch.device('cuda:0' if self.cuda else 'cpu')
        print('Using device {}'.format(self.device))
        torch.autograd.set_detect_anomaly(True)

        # Loss function
        self.bce_loss = torch.nn.BCELoss()
        self.xe_loss = torch.nn.CrossEntropyLoss()
        self.mse_loss = torch.nn.MSELoss()

        # Initialize NNs
        if config['conv_net']:
            self.generator = ConvGeneratorCNN(self.latent_dim, self.n_c,
                                              self.x_shape)
            self.encoder = ConvEncoderCNN(self.latent_dim, self.n_c)
            self.discriminator = ConvDiscriminatorCNN(
                wass_metric=self.wass_metric)
        else:
            self.generator = GeneratorCNN(latent_dim=self.latent_dim,
                                          n_c=self.n_c,
                                          gen_dims=config['gen_dims'],
                                          x_shape=self.x_shape)
            self.encoder = EncoderCNN(latent_dim=self.latent_dim,
                                      enc_dims=config['enc_dims'],
                                      n_c=self.n_c)
            self.discriminator = DiscriminatorCNN(
                disc_dims=config['disc_dims'], wass_metric=self.wass_metric)

        if self.cuda:
            self.generator.cuda()
            self.encoder.cuda()
            self.discriminator.cuda()
            self.bce_loss.cuda()
            self.xe_loss.cuda()
            self.mse_loss.cuda()
        self.TENSOR = torch.cuda.FloatTensor if self.cuda else torch.FloatTensor

        # Configure data loader
        self.batch_size = config['batch_size']
        train_idx, test_idx = split_dataset(x=x,
                                            splits=config['splits'],
                                            shuffle=config['shuffle'],
                                            fold_n=config['fold_n'])

        self.x_train = x[train_idx]
        self.y_train = y[train_idx]
        self.x_test = x[test_idx]
        self.y_test = y[test_idx]

        if outcomes is None or ids is None:
            self.trainloader = DataLoader(PrepareDataSimple(self.x_train,
                                                            y=self.y_train),
                                          batch_size=self.batch_size)
            self.testloader = DataLoader(PrepareDataSimple(self.x_test,
                                                           y=self.y_test),
                                         batch_size=self.batch_size)
        else:
            self.id_train = ids[train_idx]
            self.outcomes_train = outcomes[train_idx]
            self.id_test = ids[test_idx]
            self.outcomes_test = outcomes[test_idx]
            self.trainloader = DataLoader(PrepareData(
                x=self.x_train,
                y=self.y_train,
                ids=self.id_train,
                outcomes=self.outcomes_train),
                                          batch_size=self.batch_size)
            self.testloader = DataLoader(PrepareData(
                x=self.x_test,
                y=self.y_test,
                ids=self.id_test,
                outcomes=self.outcomes_test),
                                         batch_size=self.batch_size)

        self.ge_chain = ichain(self.generator.parameters(),
                               self.encoder.parameters())

        self.optimizer_GE = torch.optim.Adam(self.ge_chain,
                                             lr=self.lr,
                                             betas=(self.b1, self.b2),
                                             weight_decay=self.decay)
        self.optimizer_D = torch.optim.Adam(self.discriminator.parameters(),
                                            lr=self.lr,
                                            betas=(self.b1, self.b2))

        # ----------
        #  Training
        # ----------
        self.ge_l = []
        self.d_l = []
        self.c_zn = []
        self.c_zc = []
        self.c_i = []

        # metrics
        self.img_mse_loss = None
        self.lat_mse_loss = None
        self.lat_xe_loss = None

        # leghts of NN parameters to send and receive
        self.g_w_l = len(self.generator.state_dict().items())
        self.d_w_l = len(self.discriminator.state_dict().items())
        self.e_w_l = len(self.encoder.state_dict().items())

        # initiliazing to zero the federated epochs counter
        self.f_epoch = 0

        # for saving images
        self.save_images = config['save_images']
        self.client_id = client_id
        if output_folder is None:
            self.out_dir = output_folder
            self.img_dir = 'client_%d_images' % (self.client_id)
            os.makedirs(self.img_dir, exist_ok=True)
        else:
            self.out_dir = pathlib.Path(output_folder)
            os.makedirs(self.out_dir, exist_ok=True)
Пример #5
0
 for label, row in prob.iterrows():
     if np.sum(row) > 0:
         y.append(row.argmax())
     else:
         y.append(-1)
 y = np.array(y)
 # getting the outcomes
 outcomes = data_util.get_outcome_euromds_dataset()
 # getting IDs
 ids = data_util.get_euromds_ids()
 n_features = len(x.columns)
 x = np.array(x)
 outcomes = np.array(outcomes)
 ids = np.array(ids)
 # cross-val
 train_idx, test_idx = data_util.split_dataset(x=x, splits=5, fold_n=0)
 # dividing data
 x_train = x[train_idx]
 y_train = y[train_idx]
 id_train = ids[train_idx]
 outcomes_train = outcomes[train_idx]
 x_test = x[test_idx]
 y_test = y[test_idx]
 id_test = ids[test_idx]
 outcomes_test = outcomes[test_idx]
 dataloader = DataLoader(data_util.PrepareData(x=x_train,
                                               y=y_train,
                                               ids=id_train,
                                               outcomes=outcomes_train),
                         batch_size=batch_size)
 testloader = DataLoader(data_util.PrepareData(x=x_test,