def test_adda(self, epoch): print('\nTesting, epoch: %d' % epoch) performance_estimators = self.create_test_performance_estimators() for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() self.reset_before_test_epoch() validation_loader_subset = self.problem.validation_loader_range( 0, self.args.num_validation) data_provider = MultiThreadedCpuGpuDataProvider( iterator=zip(validation_loader_subset), device=self.device, batch_names=["validation"], requires_grad={"validation": []}, vectors_to_keep=["input", "softmaxGenotype"]) try: for batch_idx, (_, data_dict) in enumerate(data_provider): input_s = data_dict["validation"]["input"] target_s = data_dict["validation"]["softmaxGenotype"] self.net.eval() self.test_one_batch(performance_estimators, batch_idx, input_s, target_s) if ((batch_idx + 1) * self.mini_batch_size) > self.max_validation_examples: break # print() finally: data_provider.close() self.compute_after_test_epoch() return performance_estimators
def train_supervised_mixup(self, epoch): performance_estimators = self.create_training_performance_estimators() print('\nTraining, epoch: %d' % epoch) self.net.train() for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() unsupervised_loss_acc = 0 num_batches = 0 train_loader_subset_1 = self.problem.train_loader_subset_range( 0, self.args.num_training) train_loader_subset_2 = self.problem.train_loader_subset_range( 0, self.args.num_training) data_provider = MultiThreadedCpuGpuDataProvider( iterator=zip(train_loader_subset_1, train_loader_subset_2), is_cuda=self.use_cuda, batch_names=["training_1", "training_2"], requires_grad={ "training_1": ["input"], "training_2": ["input"] }, volatile={ "training_1": ["metaData"], "training_2": ["metaData"] }, recode_functions={ "softmaxGenotype": lambda x: recode_for_label_smoothing(x, self.epsilon), "input": self.normalize_inputs }) try: for batch_idx, (_, data_dict) in enumerate(data_provider): input_s_1 = data_dict["training_1"]["input"] target_s_1 = data_dict["training_1"]["softmaxGenotype"] input_s_2 = data_dict["training_2"]["input"] target_s_2 = data_dict["training_2"]["softmaxGenotype"] metadata_1 = data_dict["training_1"]["metaData"] metadata_2 = data_dict["training_2"]["metaData"] num_batches += 1 self.train_one_batch(performance_estimators, batch_idx, input_s_1, input_s_2, target_s_1, target_s_2, metadata_1, metadata_2) if (batch_idx + 1) * self.mini_batch_size > self.max_training_examples: break finally: data_provider.close() return performance_estimators
def train_adda(self, epoch): performance_estimators = self.create_training_performance_estimators() self.reset_before_train_epoch() print('\nTraining, epoch: %d' % epoch) for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() unsupervised_loss_acc = 0 num_batches = 0 # Use the entire training set to draw examples, even num_training is limiting the length of an epoch. train_loader_subset = self.problem.train_loader_subset_range( 0, len(self.problem.train_set())) unlabeled_loader_subset = self.problem.unlabeled_loader() data_provider = MultiThreadedCpuGpuDataProvider( iterator=zip(train_loader_subset, unlabeled_loader_subset), is_cuda=self.use_cuda, batch_names=["training", "unlabeled"], requires_grad={ "training": ["input"], "unlabeled": ["input"] }, volatile={ "training": ["metaData"], "unlabeled": ["metaData"] }, ) try: for batch_idx, (_, data_dict) in enumerate(data_provider): input_s_1 = data_dict["training"]["input"] input_u_2 = data_dict["unlabeled"]["input"] num_batches += 1 # allow some epochs of pre-training the critic without training the encoder: self.do_train_encoder = True #epoch>2 self.train_one_batch(performance_estimators, batch_idx, input_s_1, input_u_2) if (batch_idx + 1) * self.mini_batch_size > self.max_training_examples: break finally: data_provider.close() self.training_perfs = performance_estimators # Apply learning rate schedule: test_metric = performance_estimators.get_metric( self.get_test_metric_name()) assert test_metric is not None, ( self.get_test_metric_name() + "must be found among estimated performance metrics") if not self.args.constant_learning_rates: self.scheduler_train.step(test_metric, epoch) return performance_estimators
def test_semi_sup(self, epoch): print('\nTesting, epoch: %d' % epoch) performance_estimators = PerformanceList() performance_estimators += [LossHelper("test_supervised_loss")] performance_estimators += [LossHelper("test_reconstruction_loss")] performance_estimators += [AccuracyHelper("test_")] self.net.eval() for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() validation_loader_subset = self.problem.validation_loader_range(0, self.args.num_validation) data_provider = MultiThreadedCpuGpuDataProvider(iterator=zip(validation_loader_subset), is_cuda=self.use_cuda, batch_names=["validation"], requires_grad={"validation": []}, volatile={"validation": ["input", "softmaxGenotype"]}) try: for batch_idx, (_, data_dict) in enumerate(data_provider): input_s = data_dict["validation"]["input"] target_s = data_dict["validation"]["softmaxGenotype"] # we need copies of the same tensors: input_u, target_u = Variable(input_s.data, volatile=True), Variable(input_s.data, volatile=True) output_s = self.net(input_s) output_u = self.net.autoencoder(input_u) output_s_p = self.get_p(output_s) _, target_index = torch.max(target_s, dim=1) supervised_loss = self.criterion_classifier(output_s, target_s) reconstruction_loss = self.criterion_autoencoder(output_u, target_u) performance_estimators.set_metric(batch_idx, "test_supervised_loss", supervised_loss.data[0]) performance_estimators.set_metric(batch_idx, "test_reconstruction_loss", reconstruction_loss.data[0]) performance_estimators.set_metric_with_outputs(batch_idx, "test_accuracy", supervised_loss.data[0], output_s_p, targets=target_index) progress_bar(batch_idx * self.mini_batch_size, self.max_validation_examples, performance_estimators.progress_message(["test_supervised_loss", "test_reconstruction_loss", "test_accuracy"])) if ((batch_idx + 1) * self.mini_batch_size) > self.max_validation_examples: break # print() finally: data_provider.close() test_metric = performance_estimators.get_metric(self.get_test_metric_name()) assert test_metric is not None, self.get_test_metric_name() + "must be found among estimated performance metrics" if not self.args.constant_learning_rates: self.scheduler_train.step(test_metric, epoch) return performance_estimators
def test_semisupervised_mixup(self, epoch): print('\nTesting, epoch: %d' % epoch) errors = None performance_estimators = self.create_test_performance_estimators() for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() validation_loader_subset = self.problem.validation_loader_range( 0, self.args.num_validation) data_provider = MultiThreadedCpuGpuDataProvider( iterator=zip(validation_loader_subset), device=self.device, batch_names=["validation"], requires_grad={"validation": []}, recode_functions={"input": self.normalize_inputs}, vectors_to_keep=["softmaxGenotype"]) if self.best_model is None: self.best_model = self.net self.reset_before_test_epoch() try: for batch_idx, (_, data_dict) in enumerate(data_provider): input_s = data_dict["validation"]["input"] target_s = data_dict["validation"]["softmaxGenotype"] self.net.eval() self.test_one_batch(performance_estimators, batch_idx, input_s, target_s, errors=None) if ((batch_idx + 1) * self.mini_batch_size) > self.max_validation_examples: break # print() finally: data_provider.close() print("test errors by class: ", str(errors)) # Apply learning rate schedule: test_metric = performance_estimators.get_metric( self.get_test_metric_name()) assert test_metric is not None, ( self.get_test_metric_name() + "must be found among estimated performance metrics") if not self.args.constant_learning_rates: self.scheduler_train.step(test_metric, epoch) self.compute_after_test_epoch() return performance_estimators
def test_semisup_aae(self, epoch, performance_estimators=None): print('\nTesting, epoch: %d' % epoch) if performance_estimators is None: performance_estimators = self.create_test_performance_estimators() self.net.eval() for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() validation_loader_subset = self.problem.validation_loader_range( 0, self.args.num_validation) data_provider = MultiThreadedCpuGpuDataProvider( iterator=zip(validation_loader_subset), is_cuda=self.use_cuda, batch_names=["validation"], requires_grad={"validation": []}, volatile={ "validation": ["input", "softmaxGenotype"], }, recode_functions={"input": self.normalize_inputs}) self.reset_before_test_epoch() errors = None try: for batch_idx, (_, data_dict) in enumerate(data_provider): input_s = data_dict["validation"]["input"] target_s = data_dict["validation"]["softmaxGenotype"] meta_data = data_dict["validation"]["metaData"] self.test_one_batch(performance_estimators, batch_idx, input_s, target_s, meta_data, errors) if ((batch_idx + 1) * self.mini_batch_size) > self.max_validation_examples: break # print() finally: data_provider.close() # Apply learning rate schedules: test_metric = performance_estimators.get_metric( self.get_test_metric_name()) assert test_metric is not None, ( self.get_test_metric_name() + "must be found among estimated performance metrics") if not self.args.constant_learning_rates: for scheduler in self.schedulers: scheduler.step(test_metric, epoch) self.compute_after_test_epoch() return performance_estimators
def predict(self, iterator, output_filename, max_examples=sys.maxsize): self.model.eval() if self.processing_type == "multithreaded": # Enable fake_GPU_on_CPU to debug on CPU data_provider = MultiThreadedCpuGpuDataProvider( iterator=zip(iterator), is_cuda=self.use_cuda, batch_names=["unlabeled"], volatile={"unlabeled": [self.input_name]}, recode_functions=self.recode_fn, fake_gpu_on_cpu=False) elif self.processing_type == "sequential": data_provider = DataProvider( iterator=zip(iterator), is_cuda=self.use_cuda, batch_names=["unlabeled"], volatile={"unlabeled": [self.input_name]}, recode_functions=self.recode_fn) else: raise Exception("Unrecognized processing type {}".format( self.processing_type)) with VectorWriterBinary(sample_id=0, path_with_basename=output_filename, tensor_names=self.problem.get_output_names(), domain_descriptor=self.domain_descriptor, feature_mapper=self.feature_mapper, samples=self.samples, input_files=self.input_files, problem=self.problem, model=self.model) as writer: for batch_idx, (indices_dict, data_dict) in enumerate(data_provider): input_u = data_dict["unlabeled"][self.input_name] idxs_u = indices_dict["unlabeled"] outputs = self.model(input_u) writer.append(list(idxs_u), outputs, inverse_logit=True) progress_bar(batch_idx * self.mini_batch_size, max_examples) if ((batch_idx + 1) * self.mini_batch_size) > max_examples: break data_provider.close() print("Done")
def test_supervised(self, epoch): print('\nTesting, epoch: %d' % epoch) errors = None performance_estimators = self.create_test_performance_estimators() for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() validation_loader_subset = self.problem.validation_loader_range( 0, self.args.num_validation) data_provider = MultiThreadedCpuGpuDataProvider( iterator=zip(validation_loader_subset), is_cuda=self.use_cuda, batch_names=["validation"], requires_grad={"validation": []}, volatile={"validation": ["sbi", "softmaxGenotype"]}, ) try: for batch_idx, (_, data_dict) in enumerate(data_provider): sbi = data_dict["validation"]["sbi"] target_s = data_dict["validation"]["softmaxGenotype"] self.net.eval() self.test_one_batch(performance_estimators, batch_idx, sbi, target_s, errors=errors) if ((batch_idx + 1) * self.mini_batch_size) > self.max_validation_examples: break # print() finally: data_provider.close() print("test errors by class: ", str(errors)) if self.reweight_by_validation_error: self.reweight_by_val_errors(errors) # Apply learning rate schedule: test_metric = performance_estimators.get_metric( self.get_test_metric_name()) assert test_metric is not None, ( self.get_test_metric_name() + "must be found among estimated performance metrics") if not self.args.constant_learning_rates: self.scheduler_train.step(test_metric, epoch) return performance_estimators
def train_supervised(self, epoch): performance_estimators = PerformanceList() performance_estimators += [FloatHelper("supervised_loss")] performance_estimators += [AccuracyHelper("train_")] if self.use_cuda: self.tensor_cache.cuda() print('\nTraining, epoch: %d' % epoch) for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() unsupervised_loss_acc = 0 num_batches = 0 train_loader_subset = self.problem.train_loader_subset_range( 0, self.args.num_training) data_provider = MultiThreadedCpuGpuDataProvider( iterator=zip(train_loader_subset), is_cuda=self.use_cuda, batch_names=["training"], requires_grad={"training": ["sbi"]}, volatile={"training": ["metaData"]}, recode_functions={ "softmaxGenotype": lambda x: recode_for_label_smoothing(x, self.epsilon), }) cudnn.benchmark = False try: for batch_idx, (_, data_dict) in enumerate(data_provider): sbi = data_dict["training"]["sbi"] target_s = data_dict["training"]["softmaxGenotype"] metadata = data_dict["training"]["metaData"] self.train_one_batch(performance_estimators, batch_idx, sbi, target_s, metadata) if (batch_idx + 1) * self.mini_batch_size > self.max_training_examples: break finally: data_provider.close() return performance_estimators
def train_semisup(self, epoch): performance_estimators = PerformanceList() performance_estimators += [FloatHelper("optimized_loss")] performance_estimators += [FloatHelper("supervised_loss")] performance_estimators += [FloatHelper("reconstruction_loss")] performance_estimators += [AccuracyHelper("train_")] print('\nTraining, epoch: %d' % epoch) self.net.train() for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() unsupervised_loss_acc = 0 num_batches = 0 train_loader_subset = self.problem.train_loader_subset_range(0, self.args.num_training) unlabeled_loader = self.problem.unlabeled_loader() data_provider = MultiThreadedCpuGpuDataProvider(iterator=zip(train_loader_subset, unlabeled_loader),is_cuda=self.use_cuda, batch_names=["training", "unlabeled"], requires_grad={"training": ["input"], "unlabeled": ["input"]}, volatile={"training": ["metaData"], "unlabeled": []}, recode_functions={"softmaxGenotype": lambda x: recode_for_label_smoothing(x,self.epsilon)}) self.net.autoencoder.train() try: for batch_idx, (_, data_dict) in enumerate(data_provider): input_s = data_dict["training"]["input"] metadata = data_dict["training"]["metaData"] target_s = data_dict["training"]["softmaxGenotype"] input_u = data_dict["unlabeled"]["input"] num_batches += 1 # need a copy of input_u and input_s as output: target_u = Variable(input_u.data, requires_grad=False) target_output_s = Variable(input_s.data, requires_grad=False) # outputs used to calculate the loss of the supervised model # must be done with the model prior to regularization: # Zero gradients: self.net.zero_grad() self.net.autoencoder.zero_grad() self.optimizer_training.zero_grad() output_s = self.net(input_s) output_u = self.net.autoencoder(input_u) input_output_s = self.net.autoencoder(input_s) output_s_p = self.get_p(output_s) _, target_index = torch.max(target_s, dim=1) supervised_loss = self.criterion_classifier(output_s, target_s) reconstruction_loss_unsup = self.criterion_autoencoder(output_u, target_u) reconstruction_loss_sup = self.criterion_autoencoder(input_output_s, target_output_s) reconstruction_loss = self.args.gamma * reconstruction_loss_unsup+reconstruction_loss_sup optimized_loss = supervised_loss + reconstruction_loss optimized_loss.backward() self.optimizer_training.step() performance_estimators.set_metric(batch_idx, "supervised_loss", supervised_loss.data[0]) performance_estimators.set_metric(batch_idx, "reconstruction_loss", reconstruction_loss.data[0]) performance_estimators.set_metric(batch_idx, "optimized_loss", optimized_loss.data[0]) performance_estimators.set_metric_with_outputs(batch_idx, "train_accuracy", supervised_loss.data[0], output_s_p, targets=target_index) progress_bar(batch_idx * self.mini_batch_size, self.max_training_examples, performance_estimators.progress_message(["supervised_loss", "reconstruction_loss", "train_accuracy"])) if (batch_idx + 1) * self.mini_batch_size > self.max_training_examples: break finally: data_provider.close() return performance_estimators
def train_semisup_aae(self, epoch, performance_estimators=None): if performance_estimators is None: performance_estimators = self.create_training_performance_estimators() print('\nTraining, epoch: %d' % epoch) for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() self.net.train() supervised_grad_norm = 1. unsupervised_loss_acc = 0 num_batches = 0 train_loader_subset = self.problem.train_loader_subset_range(0, self.args.num_training) unlabeled_loader = self.problem.unlabeled_loader() data_provider = MultiThreadedCpuGpuDataProvider( iterator=zip(train_loader_subset, unlabeled_loader), device=self.device, batch_names=["training", "unlabeled"], requires_grad={"training": ["input"], "unlabeled": ["input"]}, recode_functions={ "softmaxGenotype": lambda x: recode_for_label_smoothing(x, self.epsilon), "input": self.normalize_inputs }, vectors_to_keep=["metaData"] ) self.reset_before_train_epoch() try: for batch_idx, (_, data_dict) in enumerate(data_provider): input_s = data_dict["training"]["input"] target_s = data_dict["training"]["softmaxGenotype"] input_u = data_dict["unlabeled"]["input"] meta_data = data_dict["training"]["metaData"] num_batches += 1 self.train_one_batch( performance_estimators, batch_idx, input_s, target_s, meta_data, input_u) if ((batch_idx + 1) * self.mini_batch_size) > self.max_training_examples: break finally: data_provider.close() latent_code_device = torch.device("cpu") if self.args.latent_code_output is not None: # Each dimension in latent code should be Gaussian distributed, so take histogram of each column # Plot histograms later to see how they compare to Gaussian latent_code_tensor = torch.stack(self.latent_codes).to(latent_code_device) latent_code_histograms = [torch.histc(latent_code_tensor[:, col_idx], bins=self.args.latent_code_bins).data.numpy() for col_idx in range(latent_code_tensor.size()[1])] gaussian_code_tensor = torch.stack(self.gaussian_codes).to(latent_code_device) gaussian_code_histograms = [torch.histc(gaussian_code_tensor[:, col_idx], bins=self.args.latent_code_bins).data.numpy() for col_idx in range(gaussian_code_tensor.size()[1])] torch.save({ "latent": latent_code_histograms, "gaussian": gaussian_code_histograms, }, "{}_{}.pt".format(self.args.latent_code_output, epoch)) return performance_estimators
# First, get the sum of all of the example vectors for index, dataset in enumerate(datasets): train_loader_subset = problem.loader_for_dataset(dataset, shuffle=True) print("Summing dataset {}/{}".format(index + 1, len(datasets))) data_provider = MultiThreadedCpuGpuDataProvider( iterator=zip(train_loader_subset), is_cuda=False, batch_names=["dataset"], volatile={"dataset": [args.vector_name]}, recode_functions={args.vector_name: add_to_sum}) batch_index = 0 for example in data_provider: batch_index += 1 if batch_index * args.mini_batch_size > args.n: break data_provider.close() # Calculate the mean print("Calculating the mean") mean = sum_n / n # Calculate the sum of squared deviations from means (sum_sdm) for index, dataset in enumerate(datasets): train_loader_subset = problem.loader_for_dataset(dataset, shuffle=True) print("Calculating sum of squared deviations for dataset {}/{}".format( index + 1, len(datasets))) data_provider = MultiThreadedCpuGpuDataProvider( iterator=zip(train_loader_subset), is_cuda=False, batch_names=["dataset"], volatile={"dataset": [args.vector_name]}, recode_functions={args.vector_name: add_to_sum_sdm}) batch_index = 0
def train_semisup_aae(self, epoch, performance_estimators=None): if performance_estimators is None: performance_estimators = PerformanceList() performance_estimators += [FloatHelper("reconstruction_loss")] performance_estimators += [FloatHelper("discriminator_loss")] performance_estimators += [FloatHelper("generator_loss")] performance_estimators += [FloatHelper("supervised_loss")] performance_estimators += [FloatHelper("weight")] print('\nTraining, epoch: %d' % epoch) for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() self.net.train() supervised_grad_norm = 1. for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() unsupervised_loss_acc = 0 num_batches = 0 train_loader_subset1 = self.problem.train_loader_subset_range( 0, self.args.num_training) train_loader_subset2 = self.problem.train_loader_subset_range( 0, self.args.num_training) data_provider = MultiThreadedCpuGpuDataProvider( iterator=zip(train_loader_subset1, train_loader_subset2), is_cuda=self.use_cuda, batch_names=["training1", "training2"], requires_grad={ "training1": ["input"], "training2": ["input"] }, volatile={ "training1": ["metaData"], "training2": ["metaData"] }, recode_functions={ "softmaxGenotype": recode_for_label_smoothing, "input": self.normalize_inputs }) indel_weight = self.args.indel_weight_factor snp_weight = 1.0 latent_codes = [] try: for batch_idx, (_, data_dict) in enumerate(data_provider): input_s1 = data_dict["training1"]["input"] input_s2 = data_dict["training2"]["input"] target_s1 = data_dict["training1"]["softmaxGenotype"] target_s2 = data_dict["training2"]["softmaxGenotype"] meta_data1 = data_dict["training1"]["metaData"] meta_data2 = data_dict["training2"]["metaData"] num_batches += 1 self.zero_grad_all_optimizers() # input_s=normalize_mean_std(input_s) # input_u=normalize_mean_std(input_u) # print(torch.mean(input_s,dim=0)) # Train reconstruction phase: self.net.decoder.train() reconstruction_loss = self.net.get_crossconstruction_loss( input_s1, input_s2, target_s2) reconstruction_loss.backward() for opt in [self.decoder_opt, self.encoder_reconstruction_opt]: opt.step() # Train discriminators: self.net.encoder.train() self.net.discriminator_cat.train() self.net.discriminator_prior.train() self.zero_grad_all_optimizers() genotype_frequencies = self.class_frequencies[ "softmaxGenotype"] category_prior = (genotype_frequencies / torch.sum(genotype_frequencies)).numpy() discriminator_loss = self.net.get_discriminator_loss( input_s1, category_prior=category_prior) discriminator_loss.backward() for opt in [ self.discriminator_cat_opt, self.discriminator_prior_opt ]: opt.step() self.zero_grad_all_optimizers() # Train generator: self.net.encoder.train() generator_loss = self.net.get_generator_loss(input_s1) generator_loss.backward() for opt in [self.encoder_generator_opt]: opt.step() self.zero_grad_all_optimizers() if self.use_pdf: self.net.encoder.train() _, latent_code = self.net.encoder(input_s1) weight = self.estimate_example_density_weight(latent_code) else: weight = self.estimate_batch_weight( meta_data1, indel_weight=indel_weight, snp_weight=snp_weight) self.net.encoder.train() supervised_loss = self.net.get_crossencoder_supervised_loss( input_s1, target_s1) * weight supervised_loss.backward() for opt in [self.encoder_semisup_opt]: opt.step() self.zero_grad_all_optimizers() performance_estimators.set_metric(batch_idx, "reconstruction_loss", reconstruction_loss.data[0]) performance_estimators.set_metric(batch_idx, "discriminator_loss", discriminator_loss.data[0]) performance_estimators.set_metric(batch_idx, "generator_loss", generator_loss.data[0]) performance_estimators.set_metric(batch_idx, "supervised_loss", supervised_loss.data[0]) performance_estimators.set_metric(batch_idx, "weight", weight) if not self.args.no_progress: progress_bar( batch_idx * self.mini_batch_size, self.max_training_examples, performance_estimators.progress_message([ "reconstruction_loss", "discriminator_loss", "generator_loss", "semisup_loss" ])) if ((batch_idx + 1) * self.mini_batch_size) > self.max_training_examples: break finally: data_provider.close() return performance_estimators
def test_semisup_aae(self, epoch, performance_estimators=None): print('\nTesting, epoch: %d' % epoch) if performance_estimators is None: performance_estimators = PerformanceList() performance_estimators += [FloatHelper("reconstruction_loss")] performance_estimators += [LossHelper("test_loss")] performance_estimators += [AccuracyHelper("test_")] performance_estimators += [FloatHelper("weight")] self.net.eval() for performance_estimator in performance_estimators: performance_estimator.init_performance_metrics() validation_loader_subset = self.problem.validation_loader_range( 0, self.args.num_validation) data_provider = MultiThreadedCpuGpuDataProvider( iterator=zip(validation_loader_subset), is_cuda=self.use_cuda, batch_names=["validation"], requires_grad={"validation": []}, volatile={ "validation": ["input", "softmaxGenotype"], }, recode_functions={"input": self.normalize_inputs}) self.net.eval() try: for batch_idx, (_, data_dict) in enumerate(data_provider): input_s = data_dict["validation"]["input"] target_s = data_dict["validation"]["softmaxGenotype"] # Estimate the reconstruction loss on validation examples: reconstruction_loss = self.net.get_crossconstruction_loss( input_s, input_s, target_s) # now evaluate prediction of categories: categories_predicted, latent_code = self.net.encoder(input_s) # categories_predicted+=self.net.latent_to_categories(latent_code) categories_predicted_p = self.get_p(categories_predicted) categories_predicted_p[ categories_predicted_p != categories_predicted_p] = 0.0 _, target_index = torch.max(target_s, dim=1) categories_loss = self.net.semisup_loss_criterion( categories_predicted, target_s) weight = self.estimate_example_density_weight(latent_code) performance_estimators.set_metric(batch_idx, "reconstruction_loss", reconstruction_loss.data[0]) performance_estimators.set_metric(batch_idx, "weight", weight) performance_estimators.set_metric_with_outputs( batch_idx, "test_accuracy", reconstruction_loss.data[0], categories_predicted_p, target_index) performance_estimators.set_metric_with_outputs( batch_idx, "test_loss", categories_loss.data[0] * weight, categories_predicted_p, target_s) if not self.args.no_progress: progress_bar( batch_idx * self.mini_batch_size, self.max_validation_examples, performance_estimators.progress_message([ "test_loss", "test_accuracy", "reconstruction_loss" ])) if ((batch_idx + 1) * self.mini_batch_size) > self.max_validation_examples: break # print() finally: data_provider.close() # Apply learning rate schedules: test_metric = performance_estimators.get_metric( self.get_test_metric_name()) assert test_metric is not None, ( self.get_test_metric_name() + "must be found among estimated performance metrics") if not self.args.constant_learning_rates: for scheduler in self.schedulers: scheduler.step(test_metric, epoch) # Run the garbage collector to try to release memory we no longer need: import gc gc.collect() return performance_estimators