Exemplo n.º 1
0
    def test_IfNotInterpolatable(self):
        """You can't graph predicted values if the classes aren't interpolatable."""

        testfilename = 'ShouldntBeGraphable.png'
        fitfilepath = wndchrm_test_dir + sep + 'test-l.fit'
        fs = FeatureSet_Discrete.NewFromFitFile(fitfilepath)
        train_set, test_set = fs.Split(randomize=False, quiet=True)
        train_set.Normalize()

        fw = FisherFeatureWeights.NewFromFeatureSet(train_set).Threshold()
        reduced_train_set = train_set.FeatureReduce(fw.names)
        reduced_test_set = test_set.FeatureReduce(fw.names)
        test_set.Normalize(train_set, quiet=True)

        batch_result = DiscreteBatchClassificationResult.New(reduced_train_set,
                                                             reduced_test_set,
                                                             fw,
                                                             quiet=True)
        graph = PredictedValuesGraph(batch_result)

        tempfile = self.tempdir + sep + testfilename

        with self.assertRaises(ValueError):
            graph.RankOrderedPredictedValuesGraph()
            graph.SaveToFile(tempfile)
    def test_NewFromFeatureSet(self):
        """Fisher score calculation"""

        feature_set = FeatureSet_Discrete.NewFromFitFile(self.test_fit_path)
        feature_set.Normalize()
        result_weights = FisherFeatureWeights.NewFromFeatureSet(feature_set)

        # test weights generated from test-l.fit:
        # wndchrm classify -l -f1.0 -vtest_fit-l.weights test-l.fit test-l.fit
        target_weights = FisherFeatureWeights.NewFromFile(
            self.test_feat_weight_path)

        for target_val, res_val in zip(target_weights.values,
                                       result_weights.values):
            self.assertAlmostEqual(target_val, res_val, delta=self.epsilon)
Exemplo n.º 3
0
    def test_fisherFeatureWeights(self):
        sig1, sig2, sig3, sig4 = self.createSignatures()

        fts = FeatureSet_Discrete()
        fts.AddSignature(sig1, 0)
        fts.AddSignature(sig2, 0)
        fts.AddSignature(sig3, 1)
        fts.AddSignature(sig4, 1)
        tmp = fts.ContiguousDataMatrix()

        # TODO: weight[1]==0, presumably because the intra-class variance=0,
        # even though feature[1] is a perfect discriminator?
        fts.Normalize()

        wts = FisherFeatureWeights.NewFromFeatureSet(fts)

        np.testing.assert_almost_equal(wts.values, [4.0, 0.0])
        self.assertEqual(wts.names, ['ft [0]', 'ft [1]'])
Exemplo n.º 4
0
class TestGraphs(unittest.TestCase):
    """Test WND-CHARM's graph-making functionality."""

    fs = CreateArtificialFeatureSet_Discrete(n_samples=1000,
                                             n_classes=10,
                                             initial_noise_sigma=100,
                                             noise_gradient=10,
                                             random_state=43)
    train_set, test_set = fs.Split(randomize=False, quiet=True)
    train_set.Normalize(quiet=True)
    fw = FisherFeatureWeights.NewFromFeatureSet(train_set).Threshold()

    reduced_train_set = train_set.FeatureReduce(fw.names)
    reduced_test_set = test_set.FeatureReduce(fw.names)
    reduced_test_set.Normalize(reduced_train_set, quiet=True)

    batch_result = DiscreteBatchClassificationResult.New(reduced_train_set,
                                                         reduced_test_set,
                                                         fw,
                                                         quiet=True)

    def setUp(self):
        self.tempdir = mkdtemp()

    def tearDown(self):
        rmtree(self.tempdir)

    def CompareGraphs(self, graph, testfilename):
        """Helper function to check output graphs"""

        tempfile = self.tempdir + sep + testfilename
        graph.SaveToFile(tempfile)

        try:
            self.assertTrue(filecmp.cmp(testfilename, tempfile))
        except AssertionError:
            print "Files not equal: {0} and {1}".format(testfilename, tempfile)
            raise

    @unittest.skipIf(HasMatplotlib, "Skipped if matplotlib IS installed")
    def test_ErrMsgIfMatplotibNotInstalled(self):
        """Fail gracefully with informative message if matplotlib"""

        graph = PredictedValuesGraph(self.batch_result)
        with self.assertRaises(ImportError):
            graph.RankOrderedPredictedValuesGraph()
        with self.assertRaises(ImportError):
            graph.KernelSmoothedDensityGraph()

    @unittest.skipUnless(HasMatplotlib,
                         "Skipped if matplotlib IS NOT installed")
    def test_RankOrderedFromBatchClassificationResult(self):
        """Rank Ordered Predicted values graph from a single split"""

        testfilename = 'test_graph_rank_ordered.png'
        graph = PredictedValuesGraph(self.batch_result)
        graph.RankOrderedPredictedValuesGraph()
        self.CompareGraphs(graph, testfilename)

    @unittest.skipUnless(HasMatplotlib,
                         "Skipped if matplotlib IS NOT installed")
    def test_KernelSmoothedFromBatchClassificationResult(self):
        """Kernel Smoothed Probability density graph from a single split"""

        testfilename = 'test_graph_kernel_smoothed.png'
        graph = PredictedValuesGraph(self.batch_result)
        graph.KernelSmoothedDensityGraph()

        self.CompareGraphs(graph, testfilename)

    @unittest.skip("Skip until ShuffleSplit has a RandomState param to pass in"
                   )
    def test_FromDiscreteClassificationExperimentResults(self):
        """Rank Ordered Predicted values graph from an experiment result (multiple splits)"""

        testfilename = 'test_graph_rank_ordered_experiment.png'

        small_fs = CreateArtificialFeatureSet_Discrete(n_samples=100,
                                                       n_classes=5,
                                                       initial_noise_sigma=100,
                                                       noise_gradient=10,
                                                       random_state=42)
        experiment = DiscreteClassificationExperimentResult.NewShuffleSplit(
            small_fs, quiet=True)
        graph = PredictedValuesGraph(self.batch_result)
        graph.RankOrderedPredictedValuesGraph()
        # graph.SaveToFile( tempfile ) # remove after RandomState for ShuffleSplit is implemented
        self.CompareGraphs(graph, testfilename)

    @unittest.skipUnless(HasMatplotlib,
                         "Skipped if matplotlib IS NOT installed")
    def test_FromHTML(self):
        """Rank Ordered Predicted values graph from an experiment result (multiple splits)"""

        testfilename = 'test_graph_fromHTML.png'
        # Inflate the zipped html file into a temp file
        import zipfile

        #zipped_file_path = pychrm_test_dir + sep + 'c_elegans_terminal_bulb.html'
        #import zlib
        #zf = zipfile.ZipFile( zipped_file_path + '.zip', mode='w' )
        #zf.write( zipped_file_path, compress_type=zipfile.ZIP_DEFLATED )
        #zf.close()

        zipped_file_path = pychrm_test_dir + sep + 'c_elegans_terminal_bulb.html.zip'
        zf = zipfile.ZipFile(zipped_file_path, mode='r')
        zf.extractall(self.tempdir)
        tempfile = self.tempdir + sep + testfilename
        htmlfilepath = self.tempdir + sep + zf.namelist()[0]
        exp_result = DiscreteClassificationExperimentResult.NewFromHTMLReport(
            htmlfilepath)
        graph = PredictedValuesGraph(exp_result)
        graph.RankOrderedPredictedValuesGraph()
        graph.SaveToFile(tempfile)
        try:
            self.assertTrue(filecmp.cmp(testfilename, tempfile))
        except AssertionError:
            print "Files not equal: {0} and {1}".format(testfilename, tempfile)
            raise

    @unittest.skip(
        "Skip until test training set w/ uninterpolatable class lables is made avail."
    )
    #@unittest.skipUnless( HasMatplotlib, "Skipped if matplotlib IS NOTinstalled" )
    def test_IfNotInterpolatable(self):
        """You can't graph predicted values if the classes aren't interpolatable."""

        testfilename = 'ShouldntBeGraphable.png'
        fitfilepath = wndchrm_test_dir + sep + 'test-l.fit'
        fs = FeatureSet_Discrete.NewFromFitFile(fitfilepath)
        train_set, test_set = fs.Split(randomize=False, quiet=True)
        train_set.Normalize()

        fw = FisherFeatureWeights.NewFromFeatureSet(train_set).Threshold()
        reduced_train_set = train_set.FeatureReduce(fw.names)
        reduced_test_set = test_set.FeatureReduce(fw.names)
        test_set.Normalize(train_set, quiet=True)

        batch_result = DiscreteBatchClassificationResult.New(reduced_train_set,
                                                             reduced_test_set,
                                                             fw,
                                                             quiet=True)
        graph = PredictedValuesGraph(batch_result)

        tempfile = self.tempdir + sep + testfilename

        with self.assertRaises(ValueError):
            graph.RankOrderedPredictedValuesGraph()
            graph.SaveToFile(tempfile)