def test_SplitOptions( self ): from wndcharm.ArtificialFeatureSpace import CreateArtificialFeatureSpace_Discrete fs_discrete = CreateArtificialFeatureSpace_Discrete( n_samples=1000, n_classes=10, num_features_per_signal_type=30, noise_gradient=5, initial_noise_sigma=10, n_samples_per_group=1, interpolatable=True, random_state=42) # default train_set, test_set = fs_discrete.Split( random_state=42, quiet=True ) self.assertEqual( train_set.shape, (750, 600) ) self.assertEqual( test_set.shape, (250, 600) ) # Supposed to only return single FeatureSpace instead of 2-tuple of FeatureSpace # when setting test_size = 0 i = 50 retval = fs_discrete.Split( train_size=i, test_size=0, random_state=42, quiet=True ) self.assertEqual( type(retval), FeatureSpace ) self.assertEqual( retval.num_samples, i * fs_discrete.num_classes ) # dummyproofing self.assertRaises( ValueError, fs_discrete.Split, train_size='trash' ) self.assertRaises( ValueError, fs_discrete.Split, train_size=1.1 ) self.assertRaises( ValueError, fs_discrete.Split, test_size='trash' ) self.assertRaises( ValueError, fs_discrete.Split, test_size=1.1 ) # What if the feature set number of groups within a class are less than called for # when specifying by integer? self.assertRaises( ValueError, test_set.Split, test_size=25 ) # What happens when input fs has unbalanced classes, some of which have enough # to satisfy train_size/test_size params, and some don't remove_these = range(250,300) + range(700,750) fs_class_2_and_7_smaller = \ fs_discrete.SampleReduce( leave_out_sample_group_ids=remove_these ) self.assertRaises( ValueError, fs_class_2_and_7_smaller.Split, train_size=80, test_size=20 ) # Test balanced_classes: train_fs, test_fs = fs_class_2_and_7_smaller.Split() # Training set number rounds down (apparently). from math import floor expected_num_samps_per_train_class = int( floor(50*0.75) ) expected_num_samps_per_test_class = 50 - expected_num_samps_per_train_class err_msg = "Balanced classes {} set split error, class {}, expected {}, got {}" for i, (n_train, n_test) in enumerate( zip( train_fs.class_sizes, test_fs.class_sizes )): self.assertEqual( n_train, expected_num_samps_per_train_class, msg=\ err_msg.format( "TRAIN", i, expected_num_samps_per_train_class, n_train ) ) self.assertEqual( n_test, expected_num_samps_per_test_class, msg=\ err_msg.format( "TEST", i, expected_num_samps_per_test_class, n_test ) )
def test_SplitOptions(self): from wndcharm.ArtificialFeatureSpace import CreateArtificialFeatureSpace_Discrete fs_discrete = CreateArtificialFeatureSpace_Discrete( n_samples=1000, n_classes=10, num_features_per_signal_type=30, noise_gradient=5, initial_noise_sigma=10, n_samples_per_group=1, interpolatable=True, random_state=42) # default train_set, test_set = fs_discrete.Split(random_state=42, quiet=True) self.assertEqual(train_set.shape, (750, 600)) self.assertEqual(test_set.shape, (250, 600)) # Supposed to only return single FeatureSpace instead of 2-tuple of FeatureSpace # when setting test_size = 0 i = 50 retval = fs_discrete.Split(train_size=i, test_size=0, random_state=42, quiet=True) self.assertEqual(type(retval), FeatureSpace) self.assertEqual(retval.num_samples, i * fs_discrete.num_classes) # dummyproofing self.assertRaises(ValueError, fs_discrete.Split, train_size='trash') self.assertRaises(ValueError, fs_discrete.Split, train_size=1.1) self.assertRaises(ValueError, fs_discrete.Split, test_size='trash') self.assertRaises(ValueError, fs_discrete.Split, test_size=1.1) # What if the feature set number of groups within a class are less than called for # when specifying by integer? self.assertRaises(ValueError, test_set.Split, test_size=25) # What happens when input fs has unbalanced classes, some of which have enough # to satisfy train_size/test_size params, and some don't remove_these = range(250, 300) + range(700, 750) fs_class_2_and_7_smaller = \ fs_discrete.SampleReduce( leave_out_sample_group_ids=remove_these ) self.assertRaises(ValueError, fs_class_2_and_7_smaller.Split, train_size=80, test_size=20)
def test_IfNotInterpolatable( self ): """You can't graph predicted values if the classes aren't interpolatable.""" testfilename = 'ShouldntBeGraphable.png' small_fs = CreateArtificialFeatureSpace_Discrete( n_samples=20, n_classes=2, random_state=42, interpolatable=False ) train_set, test_set = small_fs.Split( random_state=False, quiet=True ) train_set.Normalize() fw = FisherFeatureWeights.NewFromFeatureSpace( train_set ).Threshold() reduced_train_set = train_set.FeatureReduce( fw ) reduced_test_set = test_set.FeatureReduce( fw ) test_set.Normalize( train_set, quiet=True ) batch_result = FeatureSpaceClassification.NewWND5( reduced_train_set, reduced_test_set, fw, quiet=True ) with self.assertRaises( ValueError ): graph = PredictedValuesGraph( batch_result )
def test_TiledTrainTestSplit(self): """Uses a fake FeatureSpace""" from wndcharm.ArtificialFeatureSpace import CreateArtificialFeatureSpace_Discrete fs_kwargs = {} fs_kwargs['name'] = "DiscreteArtificialFS 10-class" fs_kwargs['n_samples'] = 1000 fs_kwargs['n_classes'] = 10 # 100 samples per class fs_kwargs['num_features_per_signal_type'] = 25 fs_kwargs['initial_noise_sigma'] = 40 fs_kwargs['noise_gradient'] = 20 fs_kwargs['n_samples_per_group'] = 4 # 25 images, 2x2 tiling scheme fs_kwargs['interpolatable'] = True fs_kwargs['random_state'] = 43 fs_kwargs['singularity'] = False fs_kwargs['clip'] = False fs = CreateArtificialFeatureSpace_Discrete(**fs_kwargs) train, test = fs.Split(random_state=False, quiet=True) train.Normalize(inplace=True, quiet=True) fw = FisherFeatureWeights.NewFromFeatureSpace(train).Threshold() train.FeatureReduce(fw, inplace=True) test.FeatureReduce(fw, inplace=True, quiet=True).Normalize(train, inplace=True, quiet=True) result = FeatureSpaceClassification.NewWND5(train, test, fw) result.Print() for class_name in result.test_set.class_names: try: self.assertEqual( result.similarity_matrix[class_name][class_name], float(1)) except: print "offending class: {0}, val: {1}".format( class_name, result.similarity_matrix[class_name][class_name]) raise