def train_supervised_mixup(self, epoch): performance_estimators = self.create_training_performance_estimators() print('\nTraining, epoch: %d' % epoch) self.net.train() for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() unsupervised_loss_acc = 0 num_batches = 0 train_loader_subset_1 = self.problem.train_loader_subset_range( 0, self.args.num_training) train_loader_subset_2 = self.problem.train_loader_subset_range( 0, self.args.num_training) data_provider = MultiThreadedCpuGpuDataProvider( iterator=zip(train_loader_subset_1, train_loader_subset_2), is_cuda=self.use_cuda, batch_names=["training_1", "training_2"], requires_grad={ "training_1": ["input"], "training_2": ["input"] }, volatile={ "training_1": ["metaData"], "training_2": ["metaData"] }, recode_functions={ "softmaxGenotype": lambda x: recode_for_label_smoothing(x, self.epsilon), "input": self.normalize_inputs }) try: for batch_idx, (_, data_dict) in enumerate(data_provider): input_s_1 = data_dict["training_1"]["input"] target_s_1 = data_dict["training_1"]["softmaxGenotype"] input_s_2 = data_dict["training_2"]["input"] target_s_2 = data_dict["training_2"]["softmaxGenotype"] metadata_1 = data_dict["training_1"]["metaData"] metadata_2 = data_dict["training_2"]["metaData"] num_batches += 1 self.train_one_batch(performance_estimators, batch_idx, input_s_1, input_s_2, target_s_1, target_s_2, metadata_1, metadata_2) if (batch_idx + 1) * self.mini_batch_size > self.max_training_examples: break finally: data_provider.close() return performance_estimators
def to_do(model_trainer, batch_idx, todo_arguments): if args.mode == "supervised_direct": input_s, target_s, metadata = todo_arguments input_s_local = input_s.clone() target_s_local = target_s.clone() metadata_local = metadata.clone() target_s_smoothed = recode_for_label_smoothing(target_s_local, model_trainer.args.epsilon_label_smoothing) model_trainer.net.train() model_trainer.train_one_batch(model_trainer.training_performance_estimators, batch_idx, input_s_local, target_s_smoothed, metadata_local) if args.mode == "supervised_mixup": input_s_1, target_s_1, metadata_1, input_s_2, target_s_2, metadata_2 = todo_arguments input_s_1_local = input_s_1.clone() target_s_1_local = target_s_1.clone() target_s_1_smoothed = recode_for_label_smoothing(target_s_1_local, model_trainer.args.epsilon_label_smoothing) input_s_2_local = input_s_2.clone() target_s_2_local = target_s_2.clone() target_s_2_smoothed = recode_for_label_smoothing(target_s_2_local, model_trainer.args.epsilon_label_smoothing) metadata_1_local = metadata_1.clone() metadata_2_local = metadata_2.clone() model_trainer.net.train() model_trainer.train_one_batch(model_trainer.training_performance_estimators, batch_idx, input_s_1_local, input_s_2_local, target_s_1_smoothed, target_s_2_smoothed, metadata_1_local, metadata_2_local) if args.mode == "semisupervised": input_s, target_s, metadata, input_u = todo_arguments input_s_local = input_s.clone() target_s_local = target_s.clone() target_s_smoothed = recode_for_label_smoothing(target_s_local, model_trainer.args.epsilon_label_smoothing) input_u_local = input_u.clone() metadata_local = metadata.clone() model_trainer.net.train() model_trainer.train_one_batch(model_trainer.training_performance_estimators, batch_idx, input_s_local, target_s_smoothed, metadata_local, input_u_local)
def to_do(model_trainer, input_s, target_s, metadata, errors): input_s_local = input_s.clone() target_s_local = target_s.clone() metadata_local = metadata.clone() target_smoothed = recode_for_label_smoothing(target_s_local, model_trainer.args.epsilon_label_smoothing) model_trainer.net.eval() model_trainer.test_one_batch(model_trainer.test_performance_estimators, batch_idx, input_s_local, target_smoothed, metadata=metadata_local, errors=errors)
def train_supervised(self, epoch): performance_estimators = PerformanceList() performance_estimators += [FloatHelper("supervised_loss")] performance_estimators += [AccuracyHelper("train_")] if self.use_cuda: self.tensor_cache.cuda() print('\nTraining, epoch: %d' % epoch) for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() unsupervised_loss_acc = 0 num_batches = 0 train_loader_subset = self.problem.train_loader_subset_range( 0, self.args.num_training) data_provider = MultiThreadedCpuGpuDataProvider( iterator=zip(train_loader_subset), is_cuda=self.use_cuda, batch_names=["training"], requires_grad={"training": ["sbi"]}, volatile={"training": ["metaData"]}, recode_functions={ "softmaxGenotype": lambda x: recode_for_label_smoothing(x, self.epsilon), }) cudnn.benchmark = False try: for batch_idx, (_, data_dict) in enumerate(data_provider): sbi = data_dict["training"]["sbi"] target_s = data_dict["training"]["softmaxGenotype"] metadata = data_dict["training"]["metaData"] self.train_one_batch(performance_estimators, batch_idx, sbi, target_s, metadata) if (batch_idx + 1) * self.mini_batch_size > self.max_training_examples: break finally: data_provider.close() return performance_estimators
def train_semisup(self, epoch): performance_estimators = PerformanceList() performance_estimators += [FloatHelper("optimized_loss")] performance_estimators += [FloatHelper("supervised_loss")] performance_estimators += [FloatHelper("reconstruction_loss")] performance_estimators += [AccuracyHelper("train_")] print('\nTraining, epoch: %d' % epoch) self.net.train() for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() unsupervised_loss_acc = 0 num_batches = 0 train_loader_subset = self.problem.train_loader_subset_range(0, self.args.num_training) unlabeled_loader = self.problem.unlabeled_loader() data_provider = MultiThreadedCpuGpuDataProvider(iterator=zip(train_loader_subset, unlabeled_loader),is_cuda=self.use_cuda, batch_names=["training", "unlabeled"], requires_grad={"training": ["input"], "unlabeled": ["input"]}, volatile={"training": ["metaData"], "unlabeled": []}, recode_functions={"softmaxGenotype": lambda x: recode_for_label_smoothing(x,self.epsilon)}) self.net.autoencoder.train() try: for batch_idx, (_, data_dict) in enumerate(data_provider): input_s = data_dict["training"]["input"] metadata = data_dict["training"]["metaData"] target_s = data_dict["training"]["softmaxGenotype"] input_u = data_dict["unlabeled"]["input"] num_batches += 1 # need a copy of input_u and input_s as output: target_u = Variable(input_u.data, requires_grad=False) target_output_s = Variable(input_s.data, requires_grad=False) # outputs used to calculate the loss of the supervised model # must be done with the model prior to regularization: # Zero gradients: self.net.zero_grad() self.net.autoencoder.zero_grad() self.optimizer_training.zero_grad() output_s = self.net(input_s) output_u = self.net.autoencoder(input_u) input_output_s = self.net.autoencoder(input_s) output_s_p = self.get_p(output_s) _, target_index = torch.max(target_s, dim=1) supervised_loss = self.criterion_classifier(output_s, target_s) reconstruction_loss_unsup = self.criterion_autoencoder(output_u, target_u) reconstruction_loss_sup = self.criterion_autoencoder(input_output_s, target_output_s) reconstruction_loss = self.args.gamma * reconstruction_loss_unsup+reconstruction_loss_sup optimized_loss = supervised_loss + reconstruction_loss optimized_loss.backward() self.optimizer_training.step() performance_estimators.set_metric(batch_idx, "supervised_loss", supervised_loss.data[0]) performance_estimators.set_metric(batch_idx, "reconstruction_loss", reconstruction_loss.data[0]) performance_estimators.set_metric(batch_idx, "optimized_loss", optimized_loss.data[0]) performance_estimators.set_metric_with_outputs(batch_idx, "train_accuracy", supervised_loss.data[0], output_s_p, targets=target_index) progress_bar(batch_idx * self.mini_batch_size, self.max_training_examples, performance_estimators.progress_message(["supervised_loss", "reconstruction_loss", "train_accuracy"])) if (batch_idx + 1) * self.mini_batch_size > self.max_training_examples: break finally: data_provider.close() return performance_estimators
def train_one_batch(self, performance_estimators, batch_idx, input_s, target_s, meta_data, input_u): self.zero_grad_all_optimizers() self.num_classes = len(target_s[0]) # Train reconstruction phase: self.net.encoder.train() self.net.decoder.train() reconstruction_loss = self.net.get_reconstruction_loss(input_u) reconstruction_loss.backward() for opt in [self.decoder_opt, self.encoder_reconstruction_opt]: opt.step() # Train discriminators: self.net.encoder.train() self.net.discriminator_cat.train() self.net.discriminator_prior.train() self.zero_grad_all_optimizers() genotype_frequencies = self.class_frequencies["softmaxGenotype"] category_prior = (genotype_frequencies / torch.sum(genotype_frequencies)).numpy() discriminator_loss = self.net.get_discriminator_loss(common_trainer=self, model_input=input_u, category_prior=category_prior, recode_labels=lambda x: recode_for_label_smoothing(x, epsilon=self.epsilon)) discriminator_loss.backward() for opt in [self.discriminator_cat_opt, self.discriminator_prior_opt]: opt.step() self.zero_grad_all_optimizers() # Train generator: self.net.encoder.train() generator_loss = self.net.get_generator_loss(input_u) generator_loss.backward() for opt in [self.encoder_generator_opt]: opt.step() self.zero_grad_all_optimizers() weight = 1 if self.use_pdf: self.net.encoder.train() _, latent_code = self.net.encoder(input_s) weight *= self.estimate_example_density_weight(latent_code) indel_weight = self.args.indel_weight_factor snp_weight = 1.0 weight *= self.estimate_batch_weight(meta_data, indel_weight=indel_weight, snp_weight=snp_weight) self.net.encoder.train() semisup_loss = self.net.get_semisup_loss(input_s, target_s) * weight semisup_loss.backward() for opt in [self.encoder_semisup_opt]: opt.step() self.zero_grad_all_optimizers() performance_estimators.set_metric(batch_idx, "reconstruction_loss", reconstruction_loss.item()) performance_estimators.set_metric(batch_idx, "discriminator_loss", discriminator_loss.item()) performance_estimators.set_metric(batch_idx, "generator_loss", generator_loss.item()) performance_estimators.set_metric(batch_idx, "semisup_loss", semisup_loss.item()) performance_estimators.set_metric(batch_idx, "weight", weight) if self.args.latent_code_output is not None: _, latent_code = self.net.encoder(input_u) # Randomly select n rows from the minibatch to keep track of the latent codes for idxs_to_sample = torch.randperm(latent_code.size()[0])[:self.args.latent_code_n_per_minibatch] for row_idx in idxs_to_sample: latent_code_row = latent_code[row_idx] self.gaussian_codes.append(torch.squeeze(draw_from_gaussian(latent_code_row.size()[0], 1))) self.latent_codes.append(latent_code_row) if not self.args.no_progress: progress_bar(batch_idx * self.mini_batch_size, self.max_training_examples, performance_estimators.progress_message( ["reconstruction_loss", "discriminator_loss", "generator_loss", "semisup_loss"]))
def train_semisup_aae(self, epoch, performance_estimators=None): if performance_estimators is None: performance_estimators = self.create_training_performance_estimators() print('\nTraining, epoch: %d' % epoch) for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() self.net.train() supervised_grad_norm = 1. unsupervised_loss_acc = 0 num_batches = 0 train_loader_subset = self.problem.train_loader_subset_range(0, self.args.num_training) unlabeled_loader = self.problem.unlabeled_loader() data_provider = MultiThreadedCpuGpuDataProvider( iterator=zip(train_loader_subset, unlabeled_loader), device=self.device, batch_names=["training", "unlabeled"], requires_grad={"training": ["input"], "unlabeled": ["input"]}, recode_functions={ "softmaxGenotype": lambda x: recode_for_label_smoothing(x, self.epsilon), "input": self.normalize_inputs }, vectors_to_keep=["metaData"] ) self.reset_before_train_epoch() try: for batch_idx, (_, data_dict) in enumerate(data_provider): input_s = data_dict["training"]["input"] target_s = data_dict["training"]["softmaxGenotype"] input_u = data_dict["unlabeled"]["input"] meta_data = data_dict["training"]["metaData"] num_batches += 1 self.train_one_batch( performance_estimators, batch_idx, input_s, target_s, meta_data, input_u) if ((batch_idx + 1) * self.mini_batch_size) > self.max_training_examples: break finally: data_provider.close() latent_code_device = torch.device("cpu") if self.args.latent_code_output is not None: # Each dimension in latent code should be Gaussian distributed, so take histogram of each column # Plot histograms later to see how they compare to Gaussian latent_code_tensor = torch.stack(self.latent_codes).to(latent_code_device) latent_code_histograms = [torch.histc(latent_code_tensor[:, col_idx], bins=self.args.latent_code_bins).data.numpy() for col_idx in range(latent_code_tensor.size()[1])] gaussian_code_tensor = torch.stack(self.gaussian_codes).to(latent_code_device) gaussian_code_histograms = [torch.histc(gaussian_code_tensor[:, col_idx], bins=self.args.latent_code_bins).data.numpy() for col_idx in range(gaussian_code_tensor.size()[1])] torch.save({ "latent": latent_code_histograms, "gaussian": gaussian_code_histograms, }, "{}_{}.pt".format(self.args.latent_code_output, epoch)) return performance_estimators