def test_build_model(self): generator = Generator(self._vars['input_dim'], self._vars['layer_dims'], self._vars['output_dim'], self._output_tensor, self._vars['tau']) generator.build((self._vars['batch_size'], self._vars['input_dim'])) self.assertIsNotNone(generator) self.assertEqual(len(generator.layers), len(self._vars['layer_dims']) + 1)
def fit(self, train_data, discrete_columns=tuple(), epochs=300, log_frequency=True): """Fit the CTGAN Synthesizer models to the training data. Args: train_data (numpy.ndarray or pandas.DataFrame): Training Data. It must be a 2-dimensional numpy array or a pandas.DataFrame. discrete_columns (list-like): List of discrete columns to be used to generate the Conditional Vector. If ``train_data`` is a Numpy array, this list should contain the integer indices of the columns. Otherwise, if it is a ``pandas.DataFrame``, this list should contain the column names. epochs (int): Number of training epochs. Defaults to 300. log_frequency (boolean): Whether to use log frequency of categorical levels in conditional sampling. Defaults to ``True``. """ self.discrete_columns = discrete_columns # Fit OH encoder self.oe = OneHotEncoder(sparse=False) if discrete_columns: if type(train_data) is np.ndarray: self.oe.fit(train_data[:, discrete_columns]) self.cat_column_idxes = discrete_columns else: self.oe.fit(train_data[discrete_columns]) features = train_data.columns self.cat_column_idxes = [ features.index(c) for c in discrete_columns ] else: self.cat_column_idxes = [] self.transformer = DataTransformer() self.transformer.fit(train_data, discrete_columns) train_data = self.transformer.transform(train_data) data_sampler = Sampler(train_data, self.transformer.output_info) data_dim = self.transformer.output_dimensions self.cond_generator = ConditionalGenerator( train_data, self.transformer.output_info, log_frequency) self.generator = Generator( self.embedding_dim + self.cond_generator.n_opt, self.gen_dim, data_dim).to(self.device) discriminator = Discriminator(data_dim + self.cond_generator.n_opt, self.dis_dim).to(self.device) optimizerG = optim.Adam(self.generator.parameters(), lr=self.gen_lr) optimizerD = optim.Adam( discriminator.parameters(), lr=self.dis_lr, ) assert self.batch_size % 2 == 0 mean = torch.zeros(self.batch_size, self.embedding_dim, device=self.device) std = mean + 1 steps_per_epoch = max(len(train_data) // self.batch_size, 1) for i in tqdm(range(epochs)): for id_ in range(steps_per_epoch): fakez = torch.normal(mean=mean, std=std) condvec = self.cond_generator.sample(self.batch_size) if condvec is None: c1, m1, col, opt = None, None, None, None real = data_sampler.sample(self.batch_size, col, opt) else: c1, m1, col, opt = condvec c1 = torch.from_numpy(c1).to(self.device) m1 = torch.from_numpy(m1).to(self.device) fakez = torch.cat([fakez, c1], dim=1) perm = np.arange(self.batch_size) np.random.shuffle(perm) real = data_sampler.sample(self.batch_size, col[perm], opt[perm]) c2 = c1[perm] fake = self.generator(fakez) fakeact = self._apply_activate(fake) real = torch.from_numpy(real.astype('float32')).to(self.device) if c1 is not None: fake_cat = torch.cat([fakeact, c1], dim=1) real_cat = torch.cat([real, c2], dim=1) else: real_cat = real fake_cat = fake y_fake = discriminator(fake_cat) y_real = discriminator(real_cat) pen = discriminator.calc_gradient_penalty( real_cat, fake_cat, self.device) loss_d = -(nanmean(y_real) - nanmean(y_fake)) optimizerD.zero_grad() pen.backward(retain_graph=True) loss_d.backward() optimizerD.step() fakez = torch.normal(mean=mean, std=std) condvec = self.cond_generator.sample(self.batch_size) if condvec is None: c1, m1, col, opt = None, None, None, None else: c1, m1, col, opt = condvec c1 = torch.from_numpy(c1).to(self.device) m1 = torch.from_numpy(m1).to(self.device) fakez = torch.cat([fakez, c1], dim=1) fake = self.generator(fakez) fakeact = self._apply_activate(fake) if c1 is not None: y_fake = discriminator(torch.cat([fakeact, c1], dim=1)) else: y_fake = discriminator(fakeact) if condvec is None: cross_entropy = 0 else: cross_entropy = self._cond_loss(fake, c1, m1) loss_g = -nanmean(y_fake) + cross_entropy optimizerG.zero_grad() loss_g.backward() optimizerG.step() if self.verbose: print("Epoch %d, Loss G: %.4f, Loss D: %.4f" % (i + 1, loss_g.detach().cpu(), loss_d.detach().cpu()), flush=True) self.discriminator = discriminator
def fit(self, train_data, eval_interval, eval, discrete_columns=tuple(), epochs=300, log_frequency=True): """Fit the CTGAN Synthesizer models to the training data. Args: train_data (numpy.ndarray or pandas.DataFrame): Training Data. It must be a 2-dimensional numpy array or a pandas.DataFrame. discrete_columns (list-like): List of discrete columns to be used to generate the Conditional Vector. If ``train_data`` is a Numpy array, this list should contain the integer indices of the columns. Otherwise, if it is a ``pandas.DataFrame``, this list should contain the column names. epochs (int): Number of training epochs. Defaults to 300. log_frequency (boolean): Whether to use log frequency of categorical levels in conditional sampling. Defaults to ``True``. """ train = train_data self.transformer = DataTransformer() self.transformer.fit(train_data, discrete_columns) train_data = self.transformer.transform(train_data) data_sampler = Sampler(train_data, self.transformer.output_info) data_dim = self.transformer.output_dimensions self.cond_generator = ConditionalGenerator( train_data, self.transformer.output_info, log_frequency) self.generator = Generator( self.embedding_dim + self.cond_generator.n_opt, self.gen_dim, data_dim).to(self.device) self.discriminator = Discriminator( data_dim + self.cond_generator.n_opt, self.dis_dim).to(self.device) optimizerG = optim.Adam(self.generator.parameters(), lr=2e-4, betas=(0.5, 0.9), weight_decay=self.l2scale) optimizerD = optim.Adam(self.discriminator.parameters(), lr=2e-4, betas=(0.5, 0.9)) assert self.batch_size % 2 == 0 mean = torch.zeros(self.batch_size, self.embedding_dim, device=self.device) std = mean + 1 # figure for plotting gradients; ax1 for generator and ax2 for discriminator #fig, (ax1, ax2) = plt.subplots(1, 2) steps_per_epoch = max(len(train_data) // self.batch_size, 1) for i in range(epochs): for id_ in range(steps_per_epoch): fakez = torch.normal(mean=mean, std=std) condvec = self.cond_generator.sample(self.batch_size) if condvec is None: c1, m1, col, opt = None, None, None, None real = data_sampler.sample(self.batch_size, col, opt) else: c1, m1, col, opt = condvec c1 = torch.from_numpy(c1).to(self.device) m1 = torch.from_numpy(m1).to(self.device) fakez = torch.cat([fakez, c1], dim=1) perm = np.arange(self.batch_size) np.random.shuffle(perm) real = data_sampler.sample(self.batch_size, col[perm], opt[perm]) c2 = c1[perm] fake = self.generator(fakez) fakeact = self._apply_activate(fake) real = torch.from_numpy(real.astype('float32')).to(self.device) if c1 is not None: fake_cat = torch.cat([fakeact, c1], dim=1) real_cat = torch.cat([real, c2], dim=1) else: real_cat = real fake_cat = fake y_fake = self.discriminator(fake_cat) y_real = self.discriminator(real_cat) pen = self.discriminator.calc_gradient_penalty( real_cat, fake_cat, self.device) loss_d = -(torch.mean(y_real) - torch.mean(y_fake)) optimizerD.zero_grad() pen.backward(retain_graph=True) loss_d.backward() #plot gradients #self.plot_grad_flow(self.discriminator.named_parameters(), ax2, 'Gradient flow for D') optimizerD.step() fakez = torch.normal(mean=mean, std=std) condvec = self.cond_generator.sample(self.batch_size) if condvec is None: c1, m1, col, opt = None, None, None, None else: c1, m1, col, opt = condvec c1 = torch.from_numpy(c1).to(self.device) m1 = torch.from_numpy(m1).to(self.device) fakez = torch.cat([fakez, c1], dim=1) fake = self.generator(fakez) fakeact = self._apply_activate(fake) if c1 is not None: y_fake = self.discriminator(torch.cat([fakeact, c1], dim=1)) else: y_fake = self.discriminator(fakeact) if condvec is None: cross_entropy = 0 else: cross_entropy = self._cond_loss(fake, c1, m1) loss_g = -torch.mean(y_fake) + cross_entropy optimizerG.zero_grad() loss_g.backward() # plot gradients #self.plot_grad_flow(self.generator.named_parameters(), ax1, 'Gradient flow for G') optimizerG.step() print("Epoch %d, Loss G: %.4f, Loss D: %.4f" % (i + 1, loss_g.detach().cpu(), loss_d.detach().cpu()), flush=True) #check model results every x epochs #fig2, ax3 = plt.subplots(1) if eval: stats_real = train[self.demand_column].describe() stats_real_week = train.groupby('Weekday')[ self.demand_column].describe() stats_real_month = train.groupby('Month')[ self.demand_column].describe() if ((i + 1) % eval_interval == 0): eval_sample = self.sample(1000) sample = pd.DataFrame(eval_sample, columns=eval_sample.columns) #sample.loc[:, self.demand_column].hist(bins=50, alpha=0.4, label='fake') #ax3.hist(pd.DataFrame(train, columns=train.columns).loc[:, self.demand_column], bins=50, alpha=0.4, label='real') #fig2.legend() #fig2.show() print( (sample[self.demand_column].describe() - stats_real) / stats_real) print(' ') print(((sample.groupby('Weekday')[ self.demand_column].describe() - stats_real_week) / stats_real_week).T) print(' ') print(((sample.groupby('Month')[ self.demand_column].describe() - stats_real_month) / stats_real_month).T) plt.show()
def train(self, data, categorical_columns=None, ordinal_columns=None, update_epsilon=None): if update_epsilon: self.epsilon = update_epsilon # this is to make sure data has at least 1000 points, may need it become flexible self.num_teachers = int(len(data) / 5000) + 1 self.transformer = DataTransformer() self.transformer.fit(data, discrete_columns=categorical_columns) data = self.transformer.transform(data) data_partitions = np.array_split(data, self.num_teachers) data_dim = self.transformer.output_dimensions self.cond_generator = ConditionalGenerator( data, self.transformer.output_info, self.log_frequency) # create conditional generator for each teacher model cond_generator = [ ConditionalGenerator(d, self.transformer.output_info, self.log_frequency) for d in data_partitions ] self.generator = Generator( self.embedding_dim + self.cond_generator.n_opt, self.gen_dim, data_dim).to(self.device) discriminator = Discriminator(data_dim + self.cond_generator.n_opt, self.dis_dim, self.loss, self.pack).to(self.device) student_disc = discriminator student_disc.apply(weights_init) teacher_disc = [discriminator for i in range(self.num_teachers)] for i in range(self.num_teachers): teacher_disc[i].apply(weights_init) optimizerG = optim.Adam(self.generator.parameters(), lr=2e-4, betas=(0.5, 0.9), weight_decay=self.l2scale) optimizerS = optim.Adam(student_disc.parameters(), lr=2e-4, betas=(0.5, 0.9)) optimizerT = [ optim.Adam(teacher_disc[i].parameters(), lr=2e-4, betas=(0.5, 0.9)) for i in range(self.num_teachers) ] criterion = nn.BCELoss() noise_multiplier = 1e-3 alphas = torch.tensor([0.0 for i in range(100)]) l_list = 1 + torch.tensor(range(100)) eps = 0 mean = torch.zeros(self.batch_size, self.embedding_dim, device=self.device) std = mean + 1 REAL_LABEL = 1 FAKE_LABEL = 0 criterion = nn.BCELoss() while eps < self.epsilon: # train teacher discriminators for t_2 in range(self.teacher_iters): for i in range(self.num_teachers): # print ('i is {}'.format(i)) partition_data = data_partitions[i] data_sampler = Sampler(partition_data, self.transformer.output_info) fakez = torch.normal(mean, std=std) condvec = cond_generator[i].sample(self.batch_size) if condvec is None: c1, m1, col, opt = None, None, None, None real = data_sampler.sample(self.batch_size, col, opt) else: c1, m1, col, opt = condvec c1 = torch.from_numpy(c1).to(self.device) m1 = torch.from_numpy(m1).to(self.device) fakez = torch.cat([fakez, c1], dim=1) perm = np.arange(self.batch_size) np.random.shuffle(perm) real = data_sampler.sample(self.batch_size, col[perm], opt[perm]) c2 = c1[perm] fake = self.generator(fakez) fakeact = self._apply_activate(fake) real = torch.from_numpy(real.astype('float32')).to( self.device) if c1 is not None: fake_cat = torch.cat([fakeact, c1], dim=1) real_cat = torch.cat([real, c2], dim=1) else: real_cat = real fake_cat = fake optimizerT[i].zero_grad() if self.loss == 'cross_entropy': y_fake = teacher_disc[i](fake_cat) label_fake = torch.full( (int(self.batch_size / self.pack), ), FAKE_LABEL, dtype=torch.float, device=self.device) errD_fake = criterion(y_fake, label_fake.float()) errD_fake.backward() optimizerT[i].step() # train with real label_true = torch.full( (int(self.batch_size / self.pack), ), REAL_LABEL, dtype=torch.float, device=self.device) y_real = teacher_disc[i](real_cat) errD_real = criterion(y_real, label_true.float()) errD_real.backward() optimizerT[i].step() loss_d = errD_real + errD_fake # print("Iterator is {i}, Loss D for teacher {n} is :{j}".format(i=t_2 + 1, n=i+1, j=loss_d.detach().cpu())) # train student discriminator for t_3 in range(self.student_iters): data_sampler = Sampler(data, self.transformer.output_info) fakez = torch.normal(mean, std=std) condvec = self.cond_generator.sample(self.batch_size) if condvec is None: c1, m1, col, opt = None, None, None, None real = data_sampler.sample(self.batch_size, col, opt) else: c1, m1, col, opt = condvec c1 = torch.from_numpy(c1).to(self.device) m1 = torch.from_numpy(m1).to(self.device) fakez = torch.cat([fakez, c1], dim=1) perm = np.arange(self.batch_size) np.random.shuffle(perm) real = data_sampler.sample(self.batch_size, col[perm], opt[perm]) c2 = c1[perm] fake = self.generator(fakez) fakeact = self._apply_activate(fake) if c1 is not None: fake_cat = torch.cat([fakeact, c1], dim=1) else: fake_cat = fake fake_data = fake_cat predictions, votes = pate(fake_data, teacher_disc, noise_multiplier) output = student_disc(fake_data.detach()) # update moments accountant alphas = alphas + moments_acc(self.num_teachers, votes, noise_multiplier, l_list) loss_s = criterion(output, predictions.float().to(self.device)) optimizerS.zero_grad() loss_s.backward() optimizerS.step() # print ('iterator {i}, student discriminator loss is {j}'.format(i=t_3, j=loss_s)) # train generator fakez = torch.normal(mean=mean, std=std) condvec = self.cond_generator.sample(self.batch_size) if condvec is None: c1, m1, col, opt = None, None, None, None else: c1, m1, col, opt = condvec c1 = torch.from_numpy(c1).to(self.device) m1 = torch.from_numpy(m1).to(self.device) fakez = torch.cat([fakez, c1], dim=1) fake = self.generator(fakez) fakeact = self._apply_activate(fake) if c1 is not None: y_fake = student_disc(torch.cat([fakeact, c1], dim=1)) else: y_fake = student_disc(fakeact) # if condvec is None: cross_entropy = 0 #else: # cross_entropy = self._cond_loss(fake, c1, m1) if self.loss == 'cross_entropy': label_g = torch.full((int(self.batch_size / self.pack), ), REAL_LABEL, dtype=torch.float, device=self.device) #label_g = torch.full(int(self.batch_size/self.pack,),1,device=self.device) loss_g = criterion(y_fake, label_g.float()) loss_g = loss_g + cross_entropy else: loss_g = -torch.mean(y_fake) + cross_entropy optimizerG.zero_grad() loss_g.backward() optimizerG.step() print('generator is {}'.format(loss_g.detach().cpu())) eps = min((alphas - math.log(self.delta)) / l_list)
def train(self, data, categorical_columns=None, ordinal_columns=None, update_epsilon=None): if update_epsilon: self.epsilon = update_epsilon self.transformer = DataTransformer() self.transformer.fit(data, discrete_columns=categorical_columns) train_data = self.transformer.transform(data) data_sampler = Sampler(train_data, self.transformer.output_info) data_dim = self.transformer.output_dimensions self.cond_generator = ConditionalGenerator( train_data, self.transformer.output_info, self.log_frequency) self.generator = Generator( self.embedding_dim + self.cond_generator.n_opt, self.gen_dim, data_dim).to(self.device) discriminator = Discriminator(data_dim + self.cond_generator.n_opt, self.dis_dim, self.loss, self.pack).to(self.device) optimizerG = optim.Adam(self.generator.parameters(), lr=2e-4, betas=(0.5, 0.9), weight_decay=self.l2scale) optimizerD = optim.Adam(discriminator.parameters(), lr=2e-4, betas=(0.5, 0.9)) privacy_engine = opacus.PrivacyEngine( discriminator, batch_size=self.batch_size, sample_size=train_data.shape[0], alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=self.sigma, max_grad_norm=self.max_per_sample_grad_norm, clip_per_layer=True) if not self.disabled_dp: privacy_engine.attach(optimizerD) one = torch.tensor(1, dtype=torch.float).to(self.device) mone = one * -1 REAL_LABEL = 1 FAKE_LABEL = 0 criterion = nn.BCELoss() assert self.batch_size % 2 == 0 mean = torch.zeros(self.batch_size, self.embedding_dim, device=self.device) std = mean + 1 steps_per_epoch = len(train_data) // self.batch_size for i in range(self.epochs): for id_ in range(steps_per_epoch): fakez = torch.normal(mean=mean, std=std) condvec = self.cond_generator.sample(self.batch_size) if condvec is None: c1, m1, col, opt = None, None, None, None real = data_sampler.sample(self.batch_size, col, opt) else: c1, m1, col, opt = condvec c1 = torch.from_numpy(c1).to(self.device) m1 = torch.from_numpy(m1).to(self.device) fakez = torch.cat([fakez, c1], dim=1) perm = np.arange(self.batch_size) np.random.shuffle(perm) real = data_sampler.sample(self.batch_size, col[perm], opt[perm]) c2 = c1[perm] fake = self.generator(fakez) fakeact = self._apply_activate(fake) real = torch.from_numpy(real.astype('float32')).to(self.device) if c1 is not None: fake_cat = torch.cat([fakeact, c1], dim=1) real_cat = torch.cat([real, c2], dim=1) else: real_cat = real fake_cat = fake optimizerD.zero_grad() if self.loss == 'cross_entropy': y_fake = discriminator(fake_cat) # print ('y_fake is {}'.format(y_fake)) label_fake = torch.full( (int(self.batch_size / self.pack), ), FAKE_LABEL, dtype=torch.float, device=self.device) # print ('label_fake is {}'.format(label_fake)) errD_fake = criterion(y_fake, label_fake) errD_fake.backward() optimizerD.step() # train with real label_true = torch.full( (int(self.batch_size / self.pack), ), REAL_LABEL, dtype=torch.float, device=self.device) y_real = discriminator(real_cat) errD_real = criterion(y_real, label_true) errD_real.backward() optimizerD.step() loss_d = errD_real + errD_fake else: y_fake = discriminator(fake_cat) mean_fake = torch.mean(y_fake) mean_fake.backward(one) y_real = discriminator(real_cat) mean_real = torch.mean(y_real) mean_real.backward(mone) optimizerD.step() loss_d = -(mean_real - mean_fake) max_grad_norm = [] for p in discriminator.parameters(): param_norm = p.grad.data.norm(2).item() max_grad_norm.append(param_norm) #pen = calc_gradient_penalty(discriminator, real_cat, fake_cat, self.device) #pen.backward(retain_graph=True) #loss_d.backward() #optimizerD.step() fakez = torch.normal(mean=mean, std=std) condvec = self.cond_generator.sample(self.batch_size) if condvec is None: c1, m1, col, opt = None, None, None, None else: c1, m1, col, opt = condvec c1 = torch.from_numpy(c1).to(self.device) m1 = torch.from_numpy(m1).to(self.device) fakez = torch.cat([fakez, c1], dim=1) fake = self.generator(fakez) fakeact = self._apply_activate(fake) if c1 is not None: y_fake = discriminator(torch.cat([fakeact, c1], dim=1)) else: y_fake = discriminator(fakeact) #if condvec is None: cross_entropy = 0 #else: # cross_entropy = self._cond_loss(fake, c1, m1) if self.loss == 'cross_entropy': label_g = torch.full((int(self.batch_size / self.pack), ), REAL_LABEL, dtype=torch.float, device=self.device) #label_g = torch.full(int(self.batch_size/self.pack,),1,device=self.device) loss_g = criterion(y_fake, label_g) loss_g = loss_g + cross_entropy else: loss_g = -torch.mean(y_fake) + cross_entropy optimizerG.zero_grad() loss_g.backward() optimizerG.step() if not self.disabled_dp: #if self.loss == 'cross_entropy': # autograd_grad_sample.clear_backprops(discriminator) #else: for p in discriminator.parameters(): if hasattr(p, "grad_sample"): del p.grad_sample if self.target_delta is None: self.target_delta = 1 / train_data.shape[0] epsilon, best_alpha = optimizerD.privacy_engine.get_privacy_spent( self.target_delta) self.epsilon_list.append(epsilon) self.alpha_list.append(best_alpha) #if self.verbose: if not self.disabled_dp: if self.epsilon < epsilon: break self.loss_d_list.append(loss_d) self.loss_g_list.append(loss_g) if self.verbose: print("Epoch %d, Loss G: %.4f, Loss D: %.4f" % (i + 1, loss_g.detach().cpu(), loss_d.detach().cpu()), flush=True) print('epsilon is {e}, alpha is {a}'.format(e=epsilon, a=best_alpha)) return self.loss_d_list, self.loss_g_list, self.epsilon_list, self.alpha_list
def train(self, train_data, categorical_columns=tuple(), epochs=-1, log_frequency=True, ordinal_columns=None, update_epsilon=None, verbose=False, mlflow=False): """Fit the CTGAN Synthesizer models to the training data. Args: train_data (numpy.ndarray or pandas.DataFrame): Training Data. It must be a 2-dimensional numpy array or a pandas.DataFrame. discrete_columns (list-like): List of discrete columns to be used to generate the Conditional Vector. If ``train_data`` is a Numpy array, this list should contain the integer indices of the columns. Otherwise, if it is a ``pandas.DataFrame``, this list should contain the column names. epochs (int): Number of training epochs. Defaults to init param. log_frequency (boolean): Whether to use log frequency of categorical levels in conditional sampling. Defaults to ``True``. """ if (epochs == -1): epochs = self.epochs if not hasattr(self, "transformer"): self.transformer = DataTransformer() self.transformer.fit(train_data, categorical_columns) train_data = self.transformer.transform(train_data) data_sampler = Sampler(train_data, self.transformer.output_info) data_dim = self.transformer.output_dimensions if not hasattr(self, "cond_generator"): self.cond_generator = ConditionalGenerator( train_data, self.transformer.output_info, log_frequency) if not hasattr(self, "generator"): self.generator = Generator( self.embedding_dim + self.cond_generator.n_opt, self.gen_dim, data_dim).to(self.device) if not hasattr(self, "discriminator"): self.discriminator = Discriminator( data_dim + self.cond_generator.n_opt, self.dis_dim).to(self.device) if not hasattr(self, "optimizerG"): self.optimizerG = optim.Adam(self.generator.parameters(), lr=2e-4, betas=(0.5, 0.9), weight_decay=self.l2scale) if not hasattr(self, "optimizerD"): self.optimizerD = optim.Adam(self.discriminator.parameters(), lr=2e-4, betas=(0.5, 0.9)) assert self.batch_size % 2 == 0 mean = torch.zeros(self.batch_size, self.embedding_dim, device=self.device) std = mean + 1 steps_per_epoch = max(len(train_data) // self.batch_size, 1) for i in range(epochs): self.trained_epoches += 1 for id_ in range(steps_per_epoch): fakez = torch.normal(mean=mean, std=std) condvec = self.cond_generator.sample(self.batch_size) if condvec is None: c1, m1, col, opt = None, None, None, None real = data_sampler.sample(self.batch_size, col, opt) else: c1, m1, col, opt = condvec c1 = torch.from_numpy(c1).to(self.device) m1 = torch.from_numpy(m1).to(self.device) fakez = torch.cat([fakez, c1], dim=1) perm = np.arange(self.batch_size) np.random.shuffle(perm) real = data_sampler.sample(self.batch_size, col[perm], opt[perm]) c2 = c1[perm] fake = self.generator(fakez) fakeact = self._apply_activate(fake) real = torch.from_numpy(real.astype('float32')).to(self.device) if c1 is not None: fake_cat = torch.cat([fakeact, c1], dim=1) real_cat = torch.cat([real, c2], dim=1) else: real_cat = real fake_cat = fake y_fake = self.discriminator(fake_cat) y_real = self.discriminator(real_cat) pen = self.discriminator.calc_gradient_penalty( real_cat, fake_cat, self.device) loss_d = -(torch.mean(y_real) - torch.mean(y_fake)) self.optimizerD.zero_grad() pen.backward(retain_graph=True) loss_d.backward() self.optimizerD.step() fakez = torch.normal(mean=mean, std=std) condvec = self.cond_generator.sample(self.batch_size) if condvec is None: c1, m1, col, opt = None, None, None, None else: c1, m1, col, opt = condvec c1 = torch.from_numpy(c1).to(self.device) m1 = torch.from_numpy(m1).to(self.device) fakez = torch.cat([fakez, c1], dim=1) fake = self.generator(fakez) fakeact = self._apply_activate(fake) if c1 is not None: y_fake = self.discriminator(torch.cat([fakeact, c1], dim=1)) else: y_fake = self.discriminator(fakeact) if condvec is None: cross_entropy = 0 else: cross_entropy = self._cond_loss(fake, c1, m1) loss_g = -torch.mean(y_fake) + cross_entropy self.optimizerG.zero_grad() loss_g.backward() self.optimizerG.step() if (verbose): if (i % 50 == 0): print("Epoch %d, Loss G: %.4f, Loss D: %.4f" % (self.trained_epoches - 1, loss_g.detach().cpu(), loss_d.detach().cpu()), flush=True) if (mlflow): import mlflow mlflow.log_metric("loss_d", float(loss_d.detach().cpu()), step=self.trained_epoches - 1) mlflow.log_metric("loss_g", float(loss_g.detach().cpu()), step=self.trained_epoches - 1)
def fit(self, train_data, discrete_columns=(), epochs=300, log_frequency=True): """Fit the CTGAN Synthesizer models to the training data. Args: train_data (numpy.ndarray or pandas.DataFrame): Training Data. It must be a 2-dimensional numpy array or a pandas.DataFrame. discrete_columns (list-like): List of discrete columns to be used to generate the Conditional Vector. If ``train_data`` is a Numpy array, this list should contain the integer indices of the columns. Otherwise, if it is a ``pandas.DataFrame``, this list should contain the column names. epochs (int): Number of training epochs. Defaults to 300. log_frequency (boolean): Whether to use log frequency of categorical levels in conditional sampling. Defaults to ``True``. """ self.transformer = DataTransformer() self.transformer.fit(train_data, discrete_columns) train_data = self.transformer.transform(train_data) data_sampler = Sampler(train_data, self.transformer.output_info) data_dim = self.transformer.output_dimensions self.cond_generator = ConditionalGenerator( train_data, self.transformer.output_info, log_frequency) self.generator = Generator( self.embedding_dim + self.cond_generator.n_opt, self.gen_dim, data_dim).to(self.device) discriminator = Discriminator(data_dim + self.cond_generator.n_opt, self.dis_dim).to(self.device) optimizerG = optim.Adam(self.generator.parameters(), lr=2e-4, betas=(0.5, 0.9), weight_decay=self.l2scale) optimizerD = optim.Adam(discriminator.parameters(), lr=2e-4, betas=(0.5, 0.9)) assert self.batch_size % 2 == 0 mean = torch.zeros(self.batch_size, self.embedding_dim, device=self.device) std = mean + 1 train_losses = [] early_stopping = EarlyStopping(patience=self.patience, verbose=False) steps_per_epoch = max(len(train_data) // self.batch_size, 1) for i in range(epochs): for id_ in range(steps_per_epoch): fakez = torch.normal(mean=mean, std=std) condvec = self.cond_generator.sample(self.batch_size) if condvec is None: c1, m1, col, opt = None, None, None, None real = data_sampler.sample(self.batch_size, col, opt) else: c1, m1, col, opt = condvec c1 = torch.from_numpy(c1).to(self.device) m1 = torch.from_numpy(m1).to(self.device) fakez = torch.cat([fakez, c1], dim=1) perm = np.arange(self.batch_size) np.random.shuffle(perm) real = data_sampler.sample(self.batch_size, col[perm], opt[perm]) c2 = c1[perm] fake = self.generator(fakez) fakeact = self._apply_activate(fake) real = torch.from_numpy(real.astype('float32')).to(self.device) if c1 is not None: fake_cat = torch.cat([fakeact, c1], dim=1) real_cat = torch.cat([real, c2], dim=1) else: real_cat = real fake_cat = fake y_fake = discriminator(fake_cat) y_real = discriminator(real_cat) pen = discriminator.calc_gradient_penalty( real_cat, fake_cat, self.device) loss_d = -(torch.mean(y_real) - torch.mean(y_fake)) train_losses.append(loss_d.item()) optimizerD.zero_grad() pen.backward(retain_graph=True) loss_d.backward() optimizerD.step() fakez = torch.normal(mean=mean, std=std) condvec = self.cond_generator.sample(self.batch_size) if condvec is None: c1, m1, col, opt = None, None, None, None else: c1, m1, col, opt = condvec c1 = torch.from_numpy(c1).to(self.device) m1 = torch.from_numpy(m1).to(self.device) fakez = torch.cat([fakez, c1], dim=1) fake = self.generator(fakez) fakeact = self._apply_activate(fake) if c1 is not None: y_fake = discriminator(torch.cat([fakeact, c1], dim=1)) else: y_fake = discriminator(fakeact) if condvec is None: cross_entropy = 0 else: cross_entropy = self._cond_loss(fake, c1, m1) loss_g = -torch.mean(y_fake) + cross_entropy train_losses.append(loss_g.item()) optimizerG.zero_grad() loss_g.backward() optimizerG.step() early_stopping(np.average(train_losses)) if early_stopping.early_stop: print("GAN: Early stopping after epochs {}".format(i)) break train_losses = []
def fit( self, train_data, discrete_columns=tuple(), epochs=300, verbose=True, gen_lr=2e-4, ): """Fit the CTGAN Synthesizer models to the training data. Args: train_data (numpy.ndarray or pandas.DataFrame): Training Data. It must be a 2-dimensional numpy array or a pandas.DataFrame. discrete_columns (list-like): List of discrete columns to be used to generate the Conditional Vector. If ``train_data`` is a Numpy array, this list should contain the integer indices of the columns. Otherwise, if it is a ``pandas.DataFrame``, this list should contain the column names. epochs (int): Number of training epochs. Defaults to 300. """ """ self.confidence_level = confidence_level loss_other_name = "loss_bb" if confidence_level != -1 else "loss_d" history = {"loss_g": [], loss_other_name: []} """ # Eli: add Mode-specific Normalization if not hasattr(self, "transformer"): self.transformer = DataTransformer() self.transformer.fit(train_data, discrete_columns) train_data = self.transformer.transform(train_data) data_sampler = Sampler(train_data, self.transformer.output_info) data_dim = self.transformer.output_dimensions if not hasattr(self, "cond_generator"): self.cond_generator = ConditionalGenerator( train_data, self.transformer.output_info, self.log_frequency) if not hasattr(self, "generator"): self.generator = Generator( self.embedding_dim + self.cond_generator.n_opt, self.gen_dim, data_dim).to(self.device) #print(data_dim) #print(self.cond_generator.n_opt) if not hasattr(self, "discriminator"): self.discriminator = Discriminator( data_dim + self.cond_generator.n_opt, self.dis_dim).to(self.device) """ #after sample in fit gen_output is 120 not 80(cause gen allmost twice) if not hasattr(self, "discriminator"): self.discriminator = Discriminator( 24 + self.cond_generator.n_opt, self.dis_dim ).to(self.device) """ if not hasattr(self, "optimizerG"): self.optimizerG = optim.Adam( self.generator.parameters(), lr=gen_lr, betas=(0.5, 0.9), weight_decay=self.l2scale, ) if not hasattr(self, "optimizerD"): self.optimizerD = optim.Adam(self.discriminator.parameters(), lr=2e-4, betas=(0.5, 0.9)) assert self.batch_size % 2 == 0 # init mean to zero and std to one #keep one spot to confidence level, which will be add after normal dis will mean = torch.zeros(((self.batch_size * self.embedding_dim) - 1), device=self.device) std = mean + 1 # steps_per_epoch = max(len(train_data) // self.batch_size, 1) steps_per_epoch = 10 # magic number decided with Gilad. feel free to change it loss_other_name = "loss_bb" if self.confidence_levels != [] else "loss_d" allhist = {"confidence_levels_history": []} # Eli: start training loop for current_conf_level in self.confidence_levels: #need to change, if non confidence give, aka []) no loop will run #so if no conf, need to jump over conf loop history = { "confidence_level": current_conf_level, "loss_g": [], loss_other_name: [] } for i in range(epochs): self.trained_epoches += 1 for id_ in range(steps_per_epoch): #we examine confidence lelvel so no need parts which they does not show up """ if self.confidence_levels == []: # discriminator loop for n in range(self.discriminator_steps): fakez = torch.normal(mean=mean, std=std) condvec = self.cond_generator.sample(self.batch_size) if condvec is None: c1, m1, col, opt = None, None, None, None real = data_sampler.sample(self.batch_size, col, opt) else: c1, m1, col, opt = condvec c1 = torch.from_numpy(c1).to(self.device) m1 = torch.from_numpy(m1).to(self.device) fakez = torch.cat([fakez, c1], dim=1) perm = np.arange(self.batch_size) np.random.shuffle(perm) real = data_sampler.sample( self.batch_size, col[perm], opt[perm] ) c2 = c1[perm] fake = self.generator(fakez) fakeact = self._apply_activate(fake) real = torch.from_numpy(real.astype("float32")).to(self.device) if c1 is not None: fake_cat = torch.cat([fakeact, c1], dim=1) real_cat = torch.cat([real, c2], dim=1) else: real_cat = real fake_cat = fake y_fake = self.discriminator(fake_cat) y_real = self.discriminator(real_cat) pen = self.discriminator.calc_gradient_penalty( real_cat, fake_cat, self.device ) loss_d = -(torch.mean(y_real) - torch.mean(y_fake)) if self.confidence_levels == []: # without bb loss self.optimizerD.zero_grad() pen.backward(retain_graph=True) loss_d.backward() self.optimizerD.step() """ # we examine confidence lelvel so no need parts which they does not show up #as above(no confidence and uses discriminator, no need) fakez = torch.normal(mean=mean, std=std) #added here the part of adding conf to sample #not sure why we need this part in the code but debug shows it this usses this lines #ask gilad eli confi = current_conf_level.astype( np.float32) # generator excpect float conf = torch.tensor([confi]).to( self.device ) # change conf to conf input that will sent!! fakez = torch.cat([fakez, conf], dim=0) fakez = torch.reshape( fakez, (self.batch_size, self.embedding_dim)) #end added here the part of adding conf to sample condvec = self.cond_generator.sample(self.batch_size) if condvec is None: c1, m1, col, opt = None, None, None, None else: c1, m1, col, opt = condvec c1 = torch.from_numpy(c1).to(self.device) m1 = torch.from_numpy(m1).to(self.device) fakez = torch.cat([fakez, c1], dim=1) fake = self.generator(fakez) fakeact = self._apply_activate(fake) if c1 is not None: y_fake = self.discriminator( torch.cat([fakeact, c1], dim=1)) else: y_fake = self.discriminator(fakeact) if condvec is None: cross_entropy = 0 else: cross_entropy = self._cond_loss(fake, c1, m1) if self.confidence_levels != []: # generate `batch_size` samples #samples of fit using yBB in its input #apply generator twice gen_out, gen_fakeacts = self.sample( self.batch_size, current_conf_level) """ gen_out=fakeact.detach().cpu().numpy() gen_out=self.transformer.inverse_transform(gen_out, None) """ loss_bb = self._calc_bb_confidence_loss( gen_out, current_conf_level ) #send specific confidence to loss computation #return conf bit vector input and bb_y_vec input bit #send to discriminate to connect gradient again y_fake = self.discriminator(gen_fakeacts) #find mean like in original ctgan #multiple by small number to get realy small value y_fake_mean = torch.mean(y_fake) * 0.00000000000001 y_fake_mean = y_fake_mean.to(self.device) #change to float because y_fake is float not double loss_bb = loss_bb.float() #add y_fake_mean to lossg like in origin ctgan #plus loss_bb loss_g = -y_fake_mean + loss_bb + cross_entropy #loss_g = -y_fake_mean + cross_entropy else: # original loss loss_g = -torch.mean(y_fake) + cross_entropy self.optimizerG.zero_grad() loss_g.backward() """ print("gradients\n") for p in self.generator.parameters(): print(p.grad) """ """ for name, param in self.generator.named_parameters(): #if param.requires_grad: print(name) print(param.grad) """ self.optimizerG.step() loss_g_val = loss_g.detach().cpu() loss_other_val = locals()[loss_other_name].detach().cpu() history["loss_g"].append(loss_g.item()) history[loss_other_name].append(loss_other_val.item()) if verbose: print( f"Epoch {self.trained_epoches}, Loss G: {loss_g_val}, {loss_other_name}: {loss_other_val}", flush=True, ) allhist["confidence_levels_history"].append(history) return allhist
def train(self, data, categorical_columns=None, ordinal_columns=None, update_epsilon=None, verbose=False, mlflow=False): if update_epsilon: self.epsilon = update_epsilon if not hasattr(self, "transformer"): self.transformer = DataTransformer() self.transformer.fit(data, discrete_columns=categorical_columns) data = self.transformer.transform(data) data_dim = self.transformer.output_dimensions if not hasattr(self, "cond_generator"): self.cond_generator = ConditionalGenerator( data, self.transformer.output_info, self.log_frequency) if not hasattr(self, "generator"): self.generator = Generator( self.embedding_dim + self.cond_generator.n_opt, self.gen_dim, data_dim).to(self.device) if not hasattr(self, "discriminator"): self.discriminator = Discriminator( data_dim + self.cond_generator.n_opt, self.dis_dim, self.loss, self.pack).to(self.device) if not hasattr(self, "student_disc"): self.student_disc = self.discriminator self.student_disc.apply(weights_init) if not hasattr(self, "teacher_disc"): # this is to make sure each teacher has some samples sample_per_teacher = self.sample_per_teacher if self.sample_per_teacher < len( data) else 1000 self.num_teachers = int(len(data) / sample_per_teacher) + 1 self.data_partitions = np.array_split(data, self.num_teachers) # create conditional generator for each teacher model self.cond_generator_t = [ ConditionalGenerator(d, self.transformer.output_info, self.log_frequency) for d in self.data_partitions ] self.teacher_disc = [ self.discriminator for i in range(self.num_teachers) ] for i in range(self.num_teachers): self.teacher_disc[i].apply(weights_init) if not hasattr(self, "optimizerG"): self.optimizerG = optim.Adam(self.generator.parameters(), lr=2e-4, betas=(0.5, 0.9), weight_decay=self.l2scale) if not hasattr(self, "optimizerS"): self.optimizerS = optim.Adam(self.student_disc.parameters(), lr=2e-4, betas=(0.5, 0.9)) if not hasattr(self, "optimizerT"): self.optimizerT = [ optim.Adam(self.teacher_disc[i].parameters(), lr=2e-4, betas=(0.5, 0.9)) for i in range(self.num_teachers) ] if not hasattr(self, "train_eps"): self.alphas = torch.tensor( [0.0 for i in range(self.moments_order)], device=self.device) self.l_list = 1 + torch.tensor(range(self.moments_order), device=self.device) self.train_eps = 0 mean = torch.zeros(self.batch_size, self.embedding_dim, device=self.device) std = mean + 1 one = torch.tensor(1, dtype=torch.float).to(self.device) mone = one * -1 REAL_LABEL = 1 FAKE_LABEL = 0 criterion = nn.BCELoss() if (self.loss == "cross_entropy") else self.WLoss if (verbose): print("using loss {} and regularization {}".format( self.loss, self.regularization)) epoch = 0 while self.train_eps < self.epsilon: # train teacher discriminators for t_2 in range(self.teacher_iters): for i in range(self.num_teachers): partition_data = self.data_partitions[i] data_sampler = Sampler(partition_data, self.transformer.output_info) fakez = torch.normal(mean, std=std).to(self.device) condvec = self.cond_generator_t[i].sample(self.batch_size) if condvec is None: c1, m1, col, opt = None, None, None, None real = data_sampler.sample(self.batch_size, col, opt) else: c1, m1, col, opt = condvec c1 = torch.from_numpy(c1).to(self.device) m1 = torch.from_numpy(m1).to(self.device) fakez = torch.cat([fakez, c1], dim=1) perm = np.arange(self.batch_size) np.random.shuffle(perm) real = data_sampler.sample(self.batch_size, col[perm], opt[perm]) c2 = c1[perm] fake = self.generator(fakez) fakeact = self._apply_activate(fake) real = torch.from_numpy(real.astype('float32')).to( self.device) if c1 is not None: fake_cat = torch.cat([fakeact, c1], dim=1) real_cat = torch.cat([real, c2], dim=1) else: real_cat = real fake_cat = fake self.optimizerT[i].zero_grad() y_all = torch.cat([ self.teacher_disc[i](fake_cat), self.teacher_disc[i](real_cat) ]) label_fake = torch.full( (int(self.batch_size / self.pack), 1), FAKE_LABEL, dtype=torch.float, device=self.device) label_true = torch.full( (int(self.batch_size / self.pack), 1), REAL_LABEL, dtype=torch.float, device=self.device) labels = torch.cat([label_fake, label_true]) errD = criterion(y_all, labels) errD.backward() if (self.regularization == 'dragan'): pen = self.teacher_disc[i].dragan_penalty( real_cat, device=self.device) pen.backward(retain_graph=True) self.optimizerT[i].step() # train student discriminator for t_3 in range(self.student_iters): data_sampler = Sampler(data, self.transformer.output_info) fakez = torch.normal(mean, std=std).to(self.device) condvec = self.cond_generator.sample(self.batch_size) if condvec is None: c1, m1, col, opt = None, None, None, None real = data_sampler.sample(self.batch_size, col, opt) else: c1, m1, col, opt = condvec c1 = torch.from_numpy(c1).to(self.device) m1 = torch.from_numpy(m1).to(self.device) fakez = torch.cat([fakez, c1], dim=1) perm = np.arange(self.batch_size) np.random.shuffle(perm) real = data_sampler.sample(self.batch_size, col[perm], opt[perm]) c2 = c1[perm] fake = self.generator(fakez) fakeact = self._apply_activate(fake) if c1 is not None: fake_cat = torch.cat([fakeact, c1], dim=1) else: fake_cat = fake fake_data = fake_cat predictions, votes = pate(fake_data, self.teacher_disc, self.noise_multiplier, self.device) output = self.student_disc(fake_data.detach()) # update moments accountant self.alphas = self.alphas + moments_acc( self.num_teachers, votes, self.noise_multiplier, self.l_list, self.device) loss_s = criterion(output, predictions.float().to(self.device)) self.optimizerS.zero_grad() loss_s.backward() if (self.regularization == 'dragan'): vals = torch.cat( [predictions.float().to(self.device), fake_data], axis=1) ordered = vals[vals[:, 0].sort()[1]] data_list = torch.split( ordered, predictions.shape[0] - int(predictions.sum().item())) synth_cat = torch.cat(data_list[1:], axis=0)[:, 1:] pen = self.student_disc.dragan_penalty(synth_cat, device=self.device) pen.backward(retain_graph=True) self.optimizerS.step() self.train_eps = min( (self.alphas - math.log(self.delta)) / self.l_list) #train generator fakez = torch.normal(mean=mean, std=std).to(self.device) condvec = self.cond_generator.sample(self.batch_size) if condvec is None: c1, m1, col, opt = None, None, None, None else: c1, m1, col, opt = condvec c1 = torch.from_numpy(c1).to(self.device) m1 = torch.from_numpy(m1).to(self.device) fakez = torch.cat([fakez, c1], dim=1) fake = self.generator(fakez) fakeact = self._apply_activate(fake) if c1 is not None: y_fake = self.student_disc(torch.cat([fakeact, c1], dim=1)) else: y_fake = self.student_disc(fakeact) if condvec is None: cross_entropy = 0 else: cross_entropy = self._cond_loss(fake, c1, m1) if self.loss == 'cross_entropy': label_g = torch.full((int(self.batch_size / self.pack), 1), REAL_LABEL, dtype=torch.float, device=self.device) loss_g = criterion(y_fake, label_g.float()) loss_g = loss_g + cross_entropy else: loss_g = -torch.mean(y_fake) + cross_entropy self.optimizerG.zero_grad() loss_g.backward() self.optimizerG.step() if (verbose): print('eps: {:f} \t G: {:f} \t D: {:f}'.format( self.train_eps, loss_g.detach().cpu(), loss_s.detach().cpu())) if (mlflow): import mlflow mlflow.log_metric("loss_g", float(loss_g.detach().cpu()), step=epoch) mlflow.log_metric("loss_d", float(loss_s.detach().cpu()), step=epoch) mlflow.log_metric("epsilon", float(self.train_eps), step=epoch) epoch += 1
def fit( self, train_data, discrete_columns=tuple(), epochs=300, confidence_level=-1, verbose=True, gen_lr=2e-4, ): """Fit the CTGAN Synthesizer models to the training data. Args: train_data (numpy.ndarray or pandas.DataFrame): Training Data. It must be a 2-dimensional numpy array or a pandas.DataFrame. discrete_columns (list-like): List of discrete columns to be used to generate the Conditional Vector. If ``train_data`` is a Numpy array, this list should contain the integer indices of the columns. Otherwise, if it is a ``pandas.DataFrame``, this list should contain the column names. epochs (int): Number of training epochs. Defaults to 300. """ self.confidence_level = confidence_level loss_other_name = "loss_bb" if confidence_level != -1 else "loss_d" history = {"loss_g": [], loss_other_name: []} # Eli: add Mode-specific Normalization if not hasattr(self, "transformer"): self.transformer = DataTransformer() self.transformer.fit(train_data, discrete_columns) train_data = self.transformer.transform(train_data) data_sampler = Sampler(train_data, self.transformer.output_info) data_dim = self.transformer.output_dimensions if not hasattr(self, "cond_generator"): self.cond_generator = ConditionalGenerator( train_data, self.transformer.output_info, self.log_frequency) if not hasattr(self, "generator"): self.generator = Generator( self.embedding_dim + self.cond_generator.n_opt, self.gen_dim, data_dim).to(self.device) if not hasattr(self, "discriminator"): self.discriminator = Discriminator( data_dim + self.cond_generator.n_opt, self.dis_dim).to(self.device) if not hasattr(self, "optimizerG"): self.optimizerG = optim.Adam( self.generator.parameters(), lr=gen_lr, betas=(0.5, 0.9), weight_decay=self.l2scale, ) if not hasattr(self, "optimizerD"): self.optimizerD = optim.Adam(self.discriminator.parameters(), lr=2e-4, betas=(0.5, 0.9)) assert self.batch_size % 2 == 0 # init mean to zero and std to one mean = torch.zeros(self.batch_size, self.embedding_dim, device=self.device) std = mean + 1 # steps_per_epoch = max(len(train_data) // self.batch_size, 1) steps_per_epoch = 10 # magic number decided with Gilad. feel free to change it # Eli: start training loop for i in range(epochs): self.trained_epoches += 1 for id_ in range(steps_per_epoch): if self.confidence_level == -1: # discriminator loop for n in range(self.discriminator_steps): fakez = torch.normal(mean=mean, std=std) condvec = self.cond_generator.sample(self.batch_size) if condvec is None: c1, m1, col, opt = None, None, None, None real = data_sampler.sample(self.batch_size, col, opt) else: c1, m1, col, opt = condvec c1 = torch.from_numpy(c1).to(self.device) m1 = torch.from_numpy(m1).to(self.device) fakez = torch.cat([fakez, c1], dim=1) perm = np.arange(self.batch_size) np.random.shuffle(perm) real = data_sampler.sample(self.batch_size, col[perm], opt[perm]) c2 = c1[perm] fake = self.generator(fakez) fakeact = self._apply_activate(fake) real = torch.from_numpy(real.astype("float32")).to( self.device) if c1 is not None: fake_cat = torch.cat([fakeact, c1], dim=1) real_cat = torch.cat([real, c2], dim=1) else: real_cat = real fake_cat = fake y_fake = self.discriminator(fake_cat) y_real = self.discriminator(real_cat) pen = self.discriminator.calc_gradient_penalty( real_cat, fake_cat, self.device) loss_d = -(torch.mean(y_real) - torch.mean(y_fake)) if self.confidence_level == -1: # without bb loss self.optimizerD.zero_grad() pen.backward(retain_graph=True) loss_d.backward() self.optimizerD.step() fakez = torch.normal(mean=mean, std=std) condvec = self.cond_generator.sample(self.batch_size) if condvec is None: c1, m1, col, opt = None, None, None, None else: c1, m1, col, opt = condvec c1 = torch.from_numpy(c1).to(self.device) m1 = torch.from_numpy(m1).to(self.device) fakez = torch.cat([fakez, c1], dim=1) fake = self.generator(fakez) fakeact = self._apply_activate(fake) if c1 is not None: y_fake = self.discriminator(torch.cat([fakeact, c1], dim=1)) else: y_fake = self.discriminator(fakeact) if condvec is None: cross_entropy = 0 else: cross_entropy = self._cond_loss(fake, c1, m1) if self.confidence_level != -1: # generate `batch_size` samples gen_out = self.sample(self.batch_size) loss_bb = self._calc_bb_confidence_loss(gen_out) loss_g = loss_bb + cross_entropy else: # original loss loss_g = -torch.mean(y_fake) + cross_entropy self.optimizerG.zero_grad() loss_g.backward() self.optimizerG.step() loss_g_val = loss_g.detach().cpu() loss_other_val = locals()[loss_other_name].detach().cpu() history["loss_g"].append(loss_g.item()) history[loss_other_name].append(loss_other_val.item()) if verbose: print( f"Epoch {self.trained_epoches}, Loss G: {loss_g_val}, {loss_other_name}: {loss_other_val}", flush=True, ) return history
def train(self, data, categorical_columns=None, ordinal_columns=None, update_epsilon=None): if update_epsilon: self.epsilon = update_epsilon sample_per_teacher = (self.sample_per_teacher if self.sample_per_teacher < len(data) else 1000) self.num_teachers = int(len(data) / sample_per_teacher) + 1 self.transformer = DataTransformer() self.transformer.fit(data, discrete_columns=categorical_columns) data = self.transformer.transform(data) data_partitions = np.array_split(data, self.num_teachers) data_dim = self.transformer.output_dimensions self.cond_generator = ConditionalGenerator( data, self.transformer.output_info, self.log_frequency) # create conditional generator for each teacher model cond_generator = [ ConditionalGenerator(d, self.transformer.output_info, self.log_frequency) for d in data_partitions ] self.generator = Generator( self.embedding_dim + self.cond_generator.n_opt, self.gen_dim, data_dim).to(self.device) discriminator = Discriminator(data_dim + self.cond_generator.n_opt, self.dis_dim, self.loss, self.pack).to(self.device) student_disc = discriminator student_disc.apply(weights_init) teacher_disc = [discriminator for i in range(self.num_teachers)] for i in range(self.num_teachers): teacher_disc[i].apply(weights_init) optimizer_g = optim.Adam(self.generator.parameters(), lr=2e-4, betas=(0.5, 0.9), weight_decay=self.l2scale) optimizer_s = optim.Adam(student_disc.parameters(), lr=2e-4, betas=(0.5, 0.9)) optimizer_t = [ optim.Adam(teacher_disc[i].parameters(), lr=2e-4, betas=(0.5, 0.9)) for i in range(self.num_teachers) ] noise_multiplier = self.noise_multiplier alphas = torch.tensor([0.0 for i in range(self.moments_order)], device=self.device) l_list = 1 + torch.tensor(range(self.moments_order), device=self.device) eps = 0 mean = torch.zeros(self.batch_size, self.embedding_dim, device=self.device) std = mean + 1 real_label = 1 fake_label = 0 criterion = nn.BCELoss() if (self.loss == "cross_entropy") else self.w_loss if self.verbose: print("using loss {} and regularization {}".format( self.loss, self.regularization)) while eps < self.epsilon: # train teacher discriminators for t_2 in range(self.teacher_iters): for i in range(self.num_teachers): partition_data = data_partitions[i] data_sampler = Sampler(partition_data, self.transformer.output_info) fakez = torch.normal(mean, std=std).to(self.device) condvec = cond_generator[i].sample(self.batch_size) if condvec is None: c1, m1, col, opt = None, None, None, None real = data_sampler.sample(self.batch_size, col, opt) else: c1, m1, col, opt = condvec c1 = torch.from_numpy(c1).to(self.device) m1 = torch.from_numpy(m1).to(self.device) fakez = torch.cat([fakez, c1], dim=1) perm = np.arange(self.batch_size) np.random.shuffle(perm) real = data_sampler.sample(self.batch_size, col[perm], opt[perm]) c2 = c1[perm] fake = self.generator(fakez) fakeact = self._apply_activate(fake) real = torch.from_numpy(real.astype("float32")).to( self.device) if c1 is not None: fake_cat = torch.cat([fakeact, c1], dim=1) real_cat = torch.cat([real, c2], dim=1) else: real_cat = real fake_cat = fake optimizer_t[i].zero_grad() y_all = torch.cat( [teacher_disc[i](fake_cat), teacher_disc[i](real_cat)]) label_fake = torch.full( (int(self.batch_size / self.pack), 1), fake_label, dtype=torch.float, device=self.device, ) label_true = torch.full( (int(self.batch_size / self.pack), 1), real_label, dtype=torch.float, device=self.device, ) labels = torch.cat([label_fake, label_true]) error_d = criterion(y_all, labels) error_d.backward() if self.regularization == "dragan": pen = teacher_disc[i].dragan_penalty( real_cat, device=self.device) pen.backward(retain_graph=True) optimizer_t[i].step() # train student discriminator for t_3 in range(self.student_iters): data_sampler = Sampler(data, self.transformer.output_info) fakez = torch.normal(mean, std=std) condvec = self.cond_generator.sample(self.batch_size) if condvec is None: c1, m1, col, opt = None, None, None, None real = data_sampler.sample(self.batch_size, col, opt) else: c1, m1, col, opt = condvec c1 = torch.from_numpy(c1).to(self.device) m1 = torch.from_numpy(m1).to(self.device) fakez = torch.cat([fakez, c1], dim=1) perm = np.arange(self.batch_size) np.random.shuffle(perm) real = data_sampler.sample(self.batch_size, col[perm], opt[perm]) c2 = c1[perm] fake = self.generator(fakez) fakeact = self._apply_activate(fake) if c1 is not None: fake_cat = torch.cat([fakeact, c1], dim=1) else: fake_cat = fake fake_data = fake_cat predictions, votes = pate(fake_data, teacher_disc, noise_multiplier, device=self.device) output = student_disc(fake_data.detach()) # update moments accountant alphas = alphas + moments_acc(self.num_teachers, votes, noise_multiplier, l_list, device=self.device) loss_s = criterion(output, predictions.float().to(self.device)) optimizer_s.zero_grad() loss_s.backward() if self.regularization == "dragan": vals = torch.cat([predictions, fake_data], axis=1) ordered = vals[vals[:, 0].sort()[1]] data_list = torch.split( ordered, predictions.shape[0] - int(predictions.sum().item())) synth_cat = torch.cat(data_list[1:], axis=0)[:, 1:] pen = student_disc.dragan_penalty(synth_cat, device=self.device) pen.backward(retain_graph=True) optimizer_s.step() # print ('iterator {i}, student discriminator loss is {j}'.format(i=t_3, j=loss_s)) # train generator fakez = torch.normal(mean=mean, std=std) condvec = self.cond_generator.sample(self.batch_size) if condvec is None: c1, m1, col, opt = None, None, None, None else: c1, m1, col, opt = condvec c1 = torch.from_numpy(c1).to(self.device) m1 = torch.from_numpy(m1).to(self.device) fakez = torch.cat([fakez, c1], dim=1) fake = self.generator(fakez) fakeact = self._apply_activate(fake) if c1 is not None: y_fake = student_disc(torch.cat([fakeact, c1], dim=1)) else: y_fake = student_disc(fakeact) if condvec is None: cross_entropy = 0 else: cross_entropy = self._cond_loss(fake, c1, m1) if self.loss == "cross_entropy": label_g = torch.full( (int(self.batch_size / self.pack), 1), real_label, dtype=torch.float, device=self.device, ) loss_g = criterion(y_fake, label_g.float()) loss_g = loss_g + cross_entropy else: loss_g = -torch.mean(y_fake) + cross_entropy optimizer_g.zero_grad() loss_g.backward() optimizer_g.step() eps = min((alphas - math.log(self.delta)) / l_list) if self.verbose: print("eps: {:f} \t G: {:f} \t D: {:f}".format( eps, loss_g.detach().cpu(), loss_s.detach().cpu()))
def test_call_model(self): tf.random.set_seed(0) inputs = tf.random.uniform( [self._vars['batch_size'], self._vars['input_dim']]) generator = Generator(self._vars['input_dim'], self._vars['layer_dims'], self._vars['output_dim'], self._output_tensor, self._vars['tau']) generator.build((self._vars['batch_size'], self._vars['input_dim'])) outputs, outputs_act = generator(inputs) expected_outputs = tf.constant( [[ -0.09601411, -0.05729139, 0.19020572, -0.1797896, 0.02286813, 0.07439799, 0.12995596, 0.0488571, -0.0749413, 0.10059232 ], [ -0.18619692, -0.06627501, 0.1862518, -0.2113035, 0.01581551, -0.01421695, 0.09985986, 0.03720004, -0.01467223, 0.06440569 ], [ -0.13894776, -0.12020721, 0.14429338, -0.09382746, -0.0797215, 0.04900312, 0.06119634, 0.02027673, -0.18325481, 0.03599714 ], [ -0.05591032, -0.07698131, 0.20061807, -0.20924883, 0.03713575, 0.04930973, 0.10697968, 0.04430075, -0.06608526, 0.13931122 ], [ -0.16535681, -0.03096866, 0.2077156, -0.05521879, -0.01992304, 0.0924558, 0.01926909, 0.12916182, -0.06599644, 0.04315265 ], [ -0.2171669, 0.03691592, 0.10808319, -0.09851374, -0.05920613, -0.05774612, 0.12822376, 0.02041179, 0.02793016, -0.0140909 ], [ -0.23017089, -0.04269604, 0.1598379, -0.08609627, 0.01311586, -0.00599252, -0.01298555, 0.03262053, -0.07506532, 0.05415451 ], [ -0.19203907, -0.05534588, 0.15888636, -0.16571346, -0.0952314, -0.12779173, 0.1111488, 0.02334097, 0.07275349, 0.03363734 ], [ -0.21607769, -0.05887552, 0.1601755, -0.17244826, -0.05100354, -0.07022924, 0.09414856, 0.03434329, 0.02077717, -0.0324632 ], [ -0.16389519, -0.01955811, 0.1468435, -0.22203475, 0.03108735, -0.06563827, 0.03660776, 0.05814479, 0.00825485, 0.00584181 ]], dtype=tf.float32) expected_outputs_act = tf.constant( [[ -0.09572015, -0.05722879, 0.18794465, -0.17787711, 0.02286414, 0.07426102, 0.12922926, 0.04881826, -0.07480131, 0.10025439 ], [ -0.18407457, -0.06617814, 0.1841276, -0.2082138, 0.01581419, -0.01421599, 0.09952924, 0.03718289, -0.01467118, 0.06431677 ], [ -0.1380604, -0.11963154, 0.14330022, -0.09355307, -0.07955302, 0.04896393, 0.06112006, 0.02027395, -0.1812306, 0.0359816 ], [ -0.05585213, -0.07682958, 0.19796923, -0.20624737, 0.03711868, 0.0492698, 0.10657341, 0.04427179, -0.06598922, 0.13841692 ], [ -0.163866, -0.03095876, 0.2047789, -0.05516273, -0.0199204, 0.09219324, 0.0192667, 0.12844831, -0.06590078, 0.04312588 ], [ -0.2138161, 0.03689916, 0.10766426, -0.09819627, -0.05913705, -0.05768201, 0.12752563, 0.02040895, 0.0279229, -0.01408997 ], [ -0.22619049, -0.04267012, 0.15849046, -0.08588416, 0.0131151, -0.00599245, -0.01298482, 0.03260897, -0.07492464, 0.05410162 ], [ -0.18971263, -0.05528943, 0.15756269, -0.16421305, -0.09494455, -0.12710059, 0.11069333, 0.02333673, 0.0726254, 0.03362466 ], [ -0.21277645, -0.05880757, 0.15881957, -0.1707589, -0.05095936, -0.07011399, 0.09387136, 0.03432979, 0.02077418, -0.03245179 ], [ -0.16244327, -0.01955561, 0.14579704, -0.21845654, 0.03107734, -0.06554417, 0.03659142, 0.05807935, 0.00825466, 0.00584175 ]], dtype=tf.float32) np.testing.assert_almost_equal(outputs.numpy(), expected_outputs.numpy(), decimal=self._vars['decimal']) np.testing.assert_almost_equal(outputs_act.numpy(), expected_outputs_act.numpy(), decimal=self._vars['decimal'])