示例#1
0
    def test_SplitOptions( self ):
        from wndcharm.ArtificialFeatureSpace import CreateArtificialFeatureSpace_Discrete

        fs_discrete = CreateArtificialFeatureSpace_Discrete( n_samples=1000, n_classes=10,
                num_features_per_signal_type=30, noise_gradient=5, initial_noise_sigma=10,
                n_samples_per_group=1, interpolatable=True, random_state=42)

        # default
        train_set, test_set = fs_discrete.Split( random_state=42, quiet=True )
        self.assertEqual( train_set.shape, (750, 600) )
        self.assertEqual( test_set.shape, (250, 600) )

        # Supposed to only return single FeatureSpace instead of 2-tuple of FeatureSpace
        # when setting test_size = 0
        i = 50
        retval = fs_discrete.Split( train_size=i, test_size=0, random_state=42, quiet=True )
        self.assertEqual( type(retval), FeatureSpace )
        self.assertEqual( retval.num_samples, i * fs_discrete.num_classes )

        # dummyproofing

        self.assertRaises( ValueError, fs_discrete.Split, train_size='trash' )
        self.assertRaises( ValueError, fs_discrete.Split, train_size=1.1 )
        self.assertRaises( ValueError, fs_discrete.Split, test_size='trash' )
        self.assertRaises( ValueError, fs_discrete.Split, test_size=1.1 )

        # What if the feature set number of groups within a class are less than called for
        # when specifying by integer?
        self.assertRaises( ValueError, test_set.Split, test_size=25 )

        # What happens when input fs has unbalanced classes, some of which have enough
        # to satisfy train_size/test_size params, and some don't
        remove_these = range(250,300) + range(700,750)
        fs_class_2_and_7_smaller = \
              fs_discrete.SampleReduce( leave_out_sample_group_ids=remove_these )

        self.assertRaises( ValueError, fs_class_2_and_7_smaller.Split, train_size=80,
                           test_size=20 )

        # Test balanced_classes:
        train_fs, test_fs = fs_class_2_and_7_smaller.Split()
        # Training set number rounds down (apparently).
        from math import floor
        expected_num_samps_per_train_class = int( floor(50*0.75) )
        expected_num_samps_per_test_class = 50 - expected_num_samps_per_train_class

        err_msg = "Balanced classes {} set split error, class {}, expected {}, got {}"
        for i, (n_train, n_test) in enumerate( zip( train_fs.class_sizes, test_fs.class_sizes )):
            self.assertEqual( n_train, expected_num_samps_per_train_class, msg=\
                    err_msg.format( "TRAIN", i, expected_num_samps_per_train_class, n_train  ) )
            self.assertEqual( n_test, expected_num_samps_per_test_class, msg=\
                    err_msg.format( "TEST", i, expected_num_samps_per_test_class, n_test ) )
示例#2
0
    def test_SplitOptions(self):
        from wndcharm.ArtificialFeatureSpace import CreateArtificialFeatureSpace_Discrete

        fs_discrete = CreateArtificialFeatureSpace_Discrete(
            n_samples=1000,
            n_classes=10,
            num_features_per_signal_type=30,
            noise_gradient=5,
            initial_noise_sigma=10,
            n_samples_per_group=1,
            interpolatable=True,
            random_state=42)

        # default
        train_set, test_set = fs_discrete.Split(random_state=42, quiet=True)
        self.assertEqual(train_set.shape, (750, 600))
        self.assertEqual(test_set.shape, (250, 600))

        # Supposed to only return single FeatureSpace instead of 2-tuple of FeatureSpace
        # when setting test_size = 0
        i = 50
        retval = fs_discrete.Split(train_size=i,
                                   test_size=0,
                                   random_state=42,
                                   quiet=True)
        self.assertEqual(type(retval), FeatureSpace)
        self.assertEqual(retval.num_samples, i * fs_discrete.num_classes)

        # dummyproofing

        self.assertRaises(ValueError, fs_discrete.Split, train_size='trash')
        self.assertRaises(ValueError, fs_discrete.Split, train_size=1.1)
        self.assertRaises(ValueError, fs_discrete.Split, test_size='trash')
        self.assertRaises(ValueError, fs_discrete.Split, test_size=1.1)

        # What if the feature set number of groups within a class are less than called for
        # when specifying by integer?
        self.assertRaises(ValueError, test_set.Split, test_size=25)

        # What happens when input fs has unbalanced classes, some of which have enough
        # to satisfy train_size/test_size params, and some don't
        remove_these = range(250, 300) + range(700, 750)
        fs_class_2_and_7_smaller = \
              fs_discrete.SampleReduce( leave_out_sample_group_ids=remove_these )

        self.assertRaises(ValueError,
                          fs_class_2_and_7_smaller.Split,
                          train_size=80,
                          test_size=20)
示例#3
0
    def test_IfNotInterpolatable( self ):
        """You can't graph predicted values if the classes aren't interpolatable."""

        testfilename = 'ShouldntBeGraphable.png'
        small_fs = CreateArtificialFeatureSpace_Discrete( 
                        n_samples=20, n_classes=2, random_state=42, interpolatable=False )
        train_set, test_set = small_fs.Split( random_state=False, quiet=True )
        train_set.Normalize()

        fw = FisherFeatureWeights.NewFromFeatureSpace( train_set ).Threshold()
        reduced_train_set = train_set.FeatureReduce( fw )
        reduced_test_set = test_set.FeatureReduce( fw )
        test_set.Normalize( train_set, quiet=True )

        batch_result = FeatureSpaceClassification.NewWND5(
                                    reduced_train_set, reduced_test_set, fw, quiet=True )
        with self.assertRaises( ValueError ):
            graph = PredictedValuesGraph( batch_result )
    def test_TiledTrainTestSplit(self):
        """Uses a fake FeatureSpace"""

        from wndcharm.ArtificialFeatureSpace import CreateArtificialFeatureSpace_Discrete
        fs_kwargs = {}
        fs_kwargs['name'] = "DiscreteArtificialFS 10-class"
        fs_kwargs['n_samples'] = 1000
        fs_kwargs['n_classes'] = 10  # 100 samples per class
        fs_kwargs['num_features_per_signal_type'] = 25
        fs_kwargs['initial_noise_sigma'] = 40
        fs_kwargs['noise_gradient'] = 20
        fs_kwargs['n_samples_per_group'] = 4  # 25 images, 2x2 tiling scheme
        fs_kwargs['interpolatable'] = True
        fs_kwargs['random_state'] = 43
        fs_kwargs['singularity'] = False
        fs_kwargs['clip'] = False

        fs = CreateArtificialFeatureSpace_Discrete(**fs_kwargs)

        train, test = fs.Split(random_state=False, quiet=True)
        train.Normalize(inplace=True, quiet=True)
        fw = FisherFeatureWeights.NewFromFeatureSpace(train).Threshold()

        train.FeatureReduce(fw, inplace=True)
        test.FeatureReduce(fw, inplace=True,
                           quiet=True).Normalize(train,
                                                 inplace=True,
                                                 quiet=True)

        result = FeatureSpaceClassification.NewWND5(train, test, fw)
        result.Print()

        for class_name in result.test_set.class_names:
            try:
                self.assertEqual(
                    result.similarity_matrix[class_name][class_name], float(1))
            except:
                print "offending class: {0}, val: {1}".format(
                    class_name,
                    result.similarity_matrix[class_name][class_name])
                raise