Пример #1
0
    def test_somatic_classifer(self, epoch, performance_estimators=None):
        print('\nTesting, epoch: %d' % epoch)
        if performance_estimators is None:
            performance_estimators = PerformanceList()
            performance_estimators += [LossHelper("test_loss")]
            performance_estimators += [LossHelper("classification_loss")]
            performance_estimators += [LossHelper("frequency_loss")]

        self.net.eval()
        for performance_estimator in performance_estimators:
            performance_estimator.init_performance_metrics()
        cross_entropy_loss = CrossEntropyLoss()
        mse_loss = MSELoss()
        for batch_idx, (_, data_dict) in enumerate(
                self.problem.validation_loader_range(
                    0, self.args.num_validation)):
            inputs = data_dict["input"]
            is_mutated_base_target = data_dict["isBaseMutated"]
            # transform one-hot encoding into a class index:
            max, indices = is_mutated_base_target.max(dim=1)
            is_mutated_base_target = indices
            somatic_frequency_target = data_dict["somaticFrequency"]
            if self.use_cuda:
                inputs, is_mutated_base_target, somatic_frequency_target = inputs.cuda(), \
                                                                           is_mutated_base_target.cuda(), \
                                                                           somatic_frequency_target.cuda()

            inputs, mut_targets, freq_targets = Variable(inputs), Variable(is_mutated_base_target, volatile=True), \
                                                Variable(somatic_frequency_target, volatile=True)

            is_base_mutated, output_frequency = self.net(inputs)
            classification_loss = cross_entropy_loss(is_base_mutated,
                                                     mut_targets)
            frequency_loss = mse_loss(output_frequency, freq_targets)
            test_loss = classification_loss + frequency_loss

            performance_estimators.set_metric(batch_idx, "test_loss",
                                              test_loss.data[0])
            performance_estimators.set_metric(batch_idx, "classification_loss",
                                              classification_loss.data[0])
            performance_estimators.set_metric(batch_idx, "frequency_loss",
                                              frequency_loss.data[0])

            progress_bar(
                batch_idx * self.mini_batch_size, self.max_validation_examples,
                performance_estimators.progress_message(["test_loss"]))

            if ((batch_idx + 1) *
                    self.mini_batch_size) > self.max_validation_examples:
                break
        # print()

        # Apply learning rate schedule:
        test_accuracy = performance_estimators.get_metric("test_loss")
        assert test_accuracy is not None, "test_loss must be found among estimated performance metrics"
        if not self.args.constant_learning_rates:
            self.scheduler_train.step(test_accuracy, epoch)
        return performance_estimators
Пример #2
0
    def test_semi_sup(self, epoch):
        print('\nTesting, epoch: %d' % epoch)

        performance_estimators = PerformanceList()
        performance_estimators += [LossHelper("test_supervised_loss")]
        performance_estimators += [LossHelper("test_reconstruction_loss")]
        performance_estimators += [AccuracyHelper("test_")]

        self.net.eval()
        for performance_estimator in performance_estimators:
            performance_estimator.init_performance_metrics()
        validation_loader_subset = self.problem.validation_loader_range(0, self.args.num_validation)
        data_provider = MultiThreadedCpuGpuDataProvider(iterator=zip(validation_loader_subset), is_cuda=self.use_cuda,
                                                        batch_names=["validation"],
                                                        requires_grad={"validation": []},
                                                        volatile={"validation": ["input", "softmaxGenotype"]})
        try:
            for batch_idx, (_, data_dict) in enumerate(data_provider):
                input_s = data_dict["validation"]["input"]
                target_s = data_dict["validation"]["softmaxGenotype"]
                # we need copies of the same tensors:
                input_u, target_u = Variable(input_s.data, volatile=True), Variable(input_s.data, volatile=True)

                output_s = self.net(input_s)
                output_u = self.net.autoencoder(input_u)
                output_s_p = self.get_p(output_s)

                _, target_index = torch.max(target_s, dim=1)

                supervised_loss = self.criterion_classifier(output_s, target_s)
                reconstruction_loss = self.criterion_autoencoder(output_u, target_u)

                performance_estimators.set_metric(batch_idx, "test_supervised_loss", supervised_loss.data[0])
                performance_estimators.set_metric(batch_idx, "test_reconstruction_loss", reconstruction_loss.data[0])
                performance_estimators.set_metric_with_outputs(batch_idx, "test_accuracy", supervised_loss.data[0],
                                                               output_s_p, targets=target_index)

                progress_bar(batch_idx * self.mini_batch_size, self.max_validation_examples,
                             performance_estimators.progress_message(["test_supervised_loss", "test_reconstruction_loss",
                                                                      "test_accuracy"]))

                if ((batch_idx + 1) * self.mini_batch_size) > self.max_validation_examples:
                    break
            # print()
        finally:
            data_provider.close()
        test_metric = performance_estimators.get_metric(self.get_test_metric_name())
        assert test_metric is not None, self.get_test_metric_name() + "must be found among estimated performance metrics"
        if not self.args.constant_learning_rates:
            self.scheduler_train.step(test_metric, epoch)
        return performance_estimators
Пример #3
0
    def test_autoencoder(self, epoch, performance_estimators=None):
        print('\nTesting, epoch: %d' % epoch)
        if performance_estimators is None:
            performance_estimators = PerformanceList()
            performance_estimators += [LossHelper("test_loss")]

        self.net.eval()
        for performance_estimator in performance_estimators:
            performance_estimator.init_performance_metrics()

        for batch_idx, (_, data_dict) in enumerate(
                self.problem.validation_loader_range(
                    0, self.args.num_validation)):
            inputs = data_dict["input"]
            if self.use_cuda:
                inputs = inputs.cuda()

            inputs, targets = Variable(inputs,
                                       volatile=True), Variable(inputs,
                                                                volatile=True)

            outputs = self.net(inputs)
            loss = self.criterion(outputs, targets)

            performance_estimators.set_metric_with_outputs(
                batch_idx, "test_loss", loss.data[0], outputs, targets)

            progress_bar(
                batch_idx * self.mini_batch_size, self.max_validation_examples,
                performance_estimators.progress_message(["test_loss"]))

            if ((batch_idx + 1) *
                    self.mini_batch_size) > self.max_validation_examples:
                break
        # print()

        # Apply learning rate schedule:
        test_accuracy = performance_estimators.get_metric("test_loss")
        assert test_accuracy is not None, "test_loss must be found among estimated performance metrics"
        if not self.args.constant_learning_rates:
            self.scheduler_train.step(test_accuracy, epoch)
        return performance_estimators
Пример #4
0
    def test_semisup_aae(self, epoch, performance_estimators=None):
        print('\nTesting, epoch: %d' % epoch)
        if performance_estimators is None:
            performance_estimators = PerformanceList()
            performance_estimators += [FloatHelper("reconstruction_loss")]
            performance_estimators += [LossHelper("test_loss")]
            performance_estimators += [AccuracyHelper("test_")]
            performance_estimators += [FloatHelper("weight")]

        self.net.eval()
        for performance_estimator in performance_estimators:
            performance_estimator.init_performance_metrics()
        validation_loader_subset = self.problem.validation_loader_range(
            0, self.args.num_validation)
        data_provider = MultiThreadedCpuGpuDataProvider(
            iterator=zip(validation_loader_subset),
            is_cuda=self.use_cuda,
            batch_names=["validation"],
            requires_grad={"validation": []},
            volatile={
                "validation": ["input", "softmaxGenotype"],
            },
            recode_functions={"input": self.normalize_inputs})
        self.net.eval()
        try:
            for batch_idx, (_, data_dict) in enumerate(data_provider):
                input_s = data_dict["validation"]["input"]
                target_s = data_dict["validation"]["softmaxGenotype"]

                # Estimate the reconstruction loss on validation examples:
                reconstruction_loss = self.net.get_crossconstruction_loss(
                    input_s, input_s, target_s)

                # now evaluate prediction of categories:
                categories_predicted, latent_code = self.net.encoder(input_s)
                #            categories_predicted+=self.net.latent_to_categories(latent_code)

                categories_predicted_p = self.get_p(categories_predicted)
                categories_predicted_p[
                    categories_predicted_p != categories_predicted_p] = 0.0
                _, target_index = torch.max(target_s, dim=1)
                categories_loss = self.net.semisup_loss_criterion(
                    categories_predicted, target_s)

                weight = self.estimate_example_density_weight(latent_code)
                performance_estimators.set_metric(batch_idx,
                                                  "reconstruction_loss",
                                                  reconstruction_loss.data[0])
                performance_estimators.set_metric(batch_idx, "weight", weight)
                performance_estimators.set_metric_with_outputs(
                    batch_idx, "test_accuracy", reconstruction_loss.data[0],
                    categories_predicted_p, target_index)
                performance_estimators.set_metric_with_outputs(
                    batch_idx, "test_loss", categories_loss.data[0] * weight,
                    categories_predicted_p, target_s)

                if not self.args.no_progress:
                    progress_bar(
                        batch_idx * self.mini_batch_size,
                        self.max_validation_examples,
                        performance_estimators.progress_message([
                            "test_loss", "test_accuracy", "reconstruction_loss"
                        ]))

                if ((batch_idx + 1) *
                        self.mini_batch_size) > self.max_validation_examples:
                    break
            # print()
        finally:
            data_provider.close()
        # Apply learning rate schedules:
        test_metric = performance_estimators.get_metric(
            self.get_test_metric_name())
        assert test_metric is not None, (
            self.get_test_metric_name() +
            "must be found among estimated performance metrics")
        if not self.args.constant_learning_rates:
            for scheduler in self.schedulers:
                scheduler.step(test_metric, epoch)
        # Run the garbage collector to try to release memory we no longer need:
        import gc
        gc.collect()
        return performance_estimators