def test_ContinuousFitOnFit(self): from wndcharm.ArtificialFeatureSpace import CreateArtificialFeatureSpace_Discrete fs_discrete = CreateArtificialFeatureSpace_Discrete( n_samples=1000, n_classes=10, num_features_per_signal_type=30, noise_gradient=5, initial_noise_sigma=10, n_samples_per_group=1, interpolatable=True) tempdir = mkdtemp() path_to_fit = tempdir + sep + 'Artificial.fit' try: fs_discrete.ToFitFile(path_to_fit) fs_continuous = FeatureSpace.NewFromFitFile(path_to_fit, discrete=False) fs_continuous.Normalize(quiet=True) fw_reduced = PearsonFeatureWeights.NewFromFeatureSpace( fs_continuous).Threshold() fs_reduced = fs_continuous.FeatureReduce(fw_reduced) batch_result = FeatureSpaceRegression.NewMultivariateLinear( fs_reduced, fw_reduced, quiet=True) finally: rmtree(tempdir)
def test_RemoveClass( self ): from wndcharm.ArtificialFeatureSpace import CreateArtificialFeatureSpace_Discrete n_classes = 10 fs = CreateArtificialFeatureSpace_Discrete( n_samples=1000, n_classes=n_classes, num_features_per_signal_type=30, noise_gradient=5, initial_noise_sigma=10, n_samples_per_group=1, interpolatable=True ) orig_num_features = fs.num_features #print "BEFORE:", str( fs ) fs.RemoveClass( 5, inplace=True ) #print "AFTER:", str( fs ) self.assertEqual( fs.num_classes, n_classes - 1 ) # Now, the list of classes are: # ['FakeClass-100', 'FakeClass-77.78', 'FakeClass-55.56', 'FakeClass-33.33', 'FakeClass-11.11', 'FakeClass33.33', 'FakeClass55.56', 'FakeClass77.78', 'FakeClass100'] # "FakeClass33.33" is the 5th class, so essentially we're trying to remove the # 5th class twice: fs.RemoveClass( "FakeClass+033.3" , inplace=True ) #print "AFTER AFTER:", str( fs ) self.assertEqual( fs.num_classes, n_classes - 2 ) # Each class has 100 samples, and we've removed 2 classes: self.assertEqual( fs.num_samples, 800 ) self.assertEqual( fs.shape[0], 800 ) self.assertEqual( fs.num_features, orig_num_features ) self.assertRaises( ValueError, fs.RemoveClass, class_token='trash' ) self.assertRaises( IndexError, fs.RemoveClass, class_token=10 )
def test_SamplesUnion(self): from wndcharm.ArtificialFeatureSpace import CreateArtificialFeatureSpace_Discrete n_classes = 2 fs1 = CreateArtificialFeatureSpace_Discrete( n_samples=20, n_classes=n_classes, num_features_per_signal_type=30, noise_gradient=5, initial_noise_sigma=10, n_samples_per_group=1, interpolatable=True) fitfile_path = wndchrm_test_dir + sep + 'test-l.fit' fs2 = FeatureSpace.NewFromFitFile(fitfile_path) self.assertRaises(ValueError, fs1.SamplesUnion, other_fs=fs2) fs3 = CreateArtificialFeatureSpace_Discrete( n_samples=20, n_classes=n_classes, num_features_per_signal_type=30, noise_gradient=5, initial_noise_sigma=10, n_samples_per_group=1, interpolatable=True) joined_fs = fs1 + fs3 self.assertEqual(n_classes, joined_fs.num_classes)
def test_SplitOptions( self ): from wndcharm.ArtificialFeatureSpace import CreateArtificialFeatureSpace_Discrete fs_discrete = CreateArtificialFeatureSpace_Discrete( n_samples=1000, n_classes=10, num_features_per_signal_type=30, noise_gradient=5, initial_noise_sigma=10, n_samples_per_group=1, interpolatable=True, random_state=42) # default train_set, test_set = fs_discrete.Split( random_state=42, quiet=True ) self.assertEqual( train_set.shape, (750, 600) ) self.assertEqual( test_set.shape, (250, 600) ) # Supposed to only return single FeatureSpace instead of 2-tuple of FeatureSpace # when setting test_size = 0 i = 50 retval = fs_discrete.Split( train_size=i, test_size=0, random_state=42, quiet=True ) self.assertEqual( type(retval), FeatureSpace ) self.assertEqual( retval.num_samples, i * fs_discrete.num_classes ) # dummyproofing self.assertRaises( ValueError, fs_discrete.Split, train_size='trash' ) self.assertRaises( ValueError, fs_discrete.Split, train_size=1.1 ) self.assertRaises( ValueError, fs_discrete.Split, test_size='trash' ) self.assertRaises( ValueError, fs_discrete.Split, test_size=1.1 ) # What if the feature set number of groups within a class are less than called for # when specifying by integer? self.assertRaises( ValueError, test_set.Split, test_size=25 ) # What happens when input fs has unbalanced classes, some of which have enough # to satisfy train_size/test_size params, and some don't remove_these = range(250,300) + range(700,750) fs_class_2_and_7_smaller = \ fs_discrete.SampleReduce( leave_out_sample_group_ids=remove_these ) self.assertRaises( ValueError, fs_class_2_and_7_smaller.Split, train_size=80, test_size=20 ) # Test balanced_classes: train_fs, test_fs = fs_class_2_and_7_smaller.Split() # Training set number rounds down (apparently). from math import floor expected_num_samps_per_train_class = int( floor(50*0.75) ) expected_num_samps_per_test_class = 50 - expected_num_samps_per_train_class err_msg = "Balanced classes {} set split error, class {}, expected {}, got {}" for i, (n_train, n_test) in enumerate( zip( train_fs.class_sizes, test_fs.class_sizes )): self.assertEqual( n_train, expected_num_samps_per_train_class, msg=\ err_msg.format( "TRAIN", i, expected_num_samps_per_train_class, n_train ) ) self.assertEqual( n_test, expected_num_samps_per_test_class, msg=\ err_msg.format( "TEST", i, expected_num_samps_per_test_class, n_test ) )
def test_SplitOptions(self): from wndcharm.ArtificialFeatureSpace import CreateArtificialFeatureSpace_Discrete fs_discrete = CreateArtificialFeatureSpace_Discrete( n_samples=1000, n_classes=10, num_features_per_signal_type=30, noise_gradient=5, initial_noise_sigma=10, n_samples_per_group=1, interpolatable=True, random_state=42) # default train_set, test_set = fs_discrete.Split(random_state=42, quiet=True) self.assertEqual(train_set.shape, (750, 600)) self.assertEqual(test_set.shape, (250, 600)) # Supposed to only return single FeatureSpace instead of 2-tuple of FeatureSpace # when setting test_size = 0 i = 50 retval = fs_discrete.Split(train_size=i, test_size=0, random_state=42, quiet=True) self.assertEqual(type(retval), FeatureSpace) self.assertEqual(retval.num_samples, i * fs_discrete.num_classes) # dummyproofing self.assertRaises(ValueError, fs_discrete.Split, train_size='trash') self.assertRaises(ValueError, fs_discrete.Split, train_size=1.1) self.assertRaises(ValueError, fs_discrete.Split, test_size='trash') self.assertRaises(ValueError, fs_discrete.Split, test_size=1.1) # What if the feature set number of groups within a class are less than called for # when specifying by integer? self.assertRaises(ValueError, test_set.Split, test_size=25) # What happens when input fs has unbalanced classes, some of which have enough # to satisfy train_size/test_size params, and some don't remove_these = range(250, 300) + range(700, 750) fs_class_2_and_7_smaller = \ fs_discrete.SampleReduce( leave_out_sample_group_ids=remove_these ) self.assertRaises(ValueError, fs_class_2_and_7_smaller.Split, train_size=80, test_size=20)
def test_AccuracyVersusNumFeaturesGraph( self ): """Accuracy vs. # features with and without LDA feature space transform""" testfilename = 'test_graph_rank_ordered_experiment.npy' # Make a smaller featureset to do multiple splits fs_kwargs = {} fs_kwargs['name'] = "DiscreteArtificialFS RANK ORDERED SHUFFLE SPLIT" fs_kwargs['n_samples'] = 100 # smaller fs_kwargs['n_classes'] = 5 # smaller, 20 samples per class fs_kwargs['num_features_per_signal_type'] = 10 # smaller fs_kwargs['initial_noise_sigma'] = 50 fs_kwargs['noise_gradient'] = 20 fs_kwargs['n_samples_per_group'] = 1 fs_kwargs['interpolatable'] = True fs_kwargs['random_state'] = 42 fs_kwargs['singularity'] = False fs_kwargs['clip'] = False small_fs = CreateArtificialFeatureSpace_Discrete( **fs_kwargs ) ss_kwargs = {} ss_kwargs['quiet'] = False ss_kwargs['feature_space'] = small_fs ss_kwargs['n_iter'] = n_iter = 10 ss_kwargs['train_size'] = train_size = 18 # per-class ss_kwargs['test_size' ] = test_size = 2 # per-class ss_kwargs['random_state'] = 42 graph = AccuracyVersusNumFeaturesGraph( **ss_kwargs )
def test_FromDiscreteClassificationExperimentResults(self): """Rank Ordered Predicted values graph from an experiment result (multiple splits)""" testfilename = 'test_graph_rank_ordered_experiment.npy' # Make a smaller featureset to do multiple splits fs_kwargs = {} fs_kwargs['name'] = "DiscreteArtificialFS RANK ORDERED SHUFFLE SPLIT" fs_kwargs['n_samples'] = 100 # smaller fs_kwargs['n_classes'] = 5 # smaller, 20 samples per class fs_kwargs['num_features_per_signal_type'] = 10 # smaller fs_kwargs['initial_noise_sigma'] = 50 fs_kwargs['noise_gradient'] = 20 fs_kwargs['n_samples_per_group'] = 1 fs_kwargs['interpolatable'] = True fs_kwargs['random_state'] = 42 fs_kwargs['singularity'] = False fs_kwargs['clip'] = False small_fs = CreateArtificialFeatureSpace_Discrete(**fs_kwargs) ss_kwargs = {} ss_kwargs['quiet'] = True ss_kwargs['n_iter'] = n_iter = 10 ss_kwargs['train_size'] = train_size = 18 # per-class ss_kwargs['test_size'] = test_size = 2 # per-class ss_kwargs['random_state'] = 42 exp = FeatureSpaceClassificationExperiment.NewShuffleSplit( small_fs, **ss_kwargs) graph = PredictedValuesGraph(exp, use_averaged_results=False) graph.RankOrderedPredictedValuesGraph() self.CompareGraphs(graph, testfilename)
def test_PerSampleStatisticsWITHPredictedValue(self): """DISCRETE PerSampleStatistics with numeric predicted value""" fs_kwargs = {} fs_kwargs['name'] = "DISCRETE PerSampleStatistics WITH Pred Values" fs_kwargs['n_samples'] = n_samples = 40 fs_kwargs['n_classes'] = 2 fs_kwargs[ 'num_features_per_signal_type'] = 10 # small on purpose, to make test fast fs_kwargs['noise_gradient'] = 50 fs_kwargs['initial_noise_sigma'] = 75 fs_kwargs['n_samples_per_group'] = 1 fs_kwargs['random_state'] = 42 fs_kwargs['interpolatable'] = True fs_kwargs['singularity'] = False fs_kwargs['clip'] = False fs = CreateArtificialFeatureSpace_Discrete(**fs_kwargs) ss_kwargs = {} ss_kwargs[ 'name'] = "Discrete PerSampleStatistics ShuffleSplit WITH Pred Values" ss_kwargs['quiet'] = True ss_kwargs['n_iter'] = n_iter = 10 ss_kwargs['train_size'] = train_size = 8 # per-class ss_kwargs['test_size'] = test_size = 2 # per-class ss_kwargs['random_state'] = 42 exp = FeatureSpaceClassificationExperiment.NewShuffleSplit( fs, **ss_kwargs) #Print calls self.GenereateStats() #from os import devnull exp.Print() #output_stream=devnull ) exp.PerSampleStatistics() #output_stream=devnull ) self.assertTrue(True)
def test_NumFeaturesGridSearch(self): fs_kwargs = {} fs_kwargs['name'] = "DISCRETE PerSampleStatistics WITH Pred Values" fs_kwargs['n_samples'] = n_samples = 250 fs_kwargs['n_classes'] = 10 fs_kwargs[ 'num_features_per_signal_type'] = 10 # small on purpose, to make test fast fs_kwargs['noise_gradient'] = 5 fs_kwargs['initial_noise_sigma'] = 75 fs_kwargs['n_samples_per_group'] = 1 fs_kwargs['random_state'] = 42 fs_kwargs['interpolatable'] = True fs_kwargs['singularity'] = False fs_kwargs['clip'] = False fs = CreateArtificialFeatureSpace_Discrete(**fs_kwargs) ss_kwargs = {} ss_kwargs['feature_space'] = fs ss_kwargs['quiet'] = False ss_kwargs['n_iter'] = n_iter = 10 ss_kwargs['random_state'] = 42 ss_kwargs[ 'conserve_mem'] = False # otherwise the input fs will be modified FeatureSpaceClassificationExperiment.NumFeaturesGridSearch(**ss_kwargs) ss_kwargs['lda'] = True ss_kwargs['pre_lda_feature_filter'] = True #import pdb; pdb.set_trace() FeatureSpaceClassificationExperiment.NumFeaturesGridSearch(**ss_kwargs)
def test_IfNotInterpolatable( self ): """You can't graph predicted values if the classes aren't interpolatable.""" testfilename = 'ShouldntBeGraphable.png' small_fs = CreateArtificialFeatureSpace_Discrete( n_samples=20, n_classes=2, random_state=42, interpolatable=False ) train_set, test_set = small_fs.Split( random_state=False, quiet=True ) train_set.Normalize() fw = FisherFeatureWeights.NewFromFeatureSpace( train_set ).Threshold() reduced_train_set = train_set.FeatureReduce( fw ) reduced_test_set = test_set.FeatureReduce( fw ) test_set.Normalize( train_set, quiet=True ) batch_result = FeatureSpaceClassification.NewWND5( reduced_train_set, reduced_test_set, fw, quiet=True ) with self.assertRaises( ValueError ): graph = PredictedValuesGraph( batch_result )
def test_TiledTrainTestSplit(self): """Uses a fake FeatureSpace""" from wndcharm.ArtificialFeatureSpace import CreateArtificialFeatureSpace_Discrete fs_kwargs = {} fs_kwargs['name'] = "DiscreteArtificialFS 10-class" fs_kwargs['n_samples'] = 1000 fs_kwargs['n_classes'] = 10 # 100 samples per class fs_kwargs['num_features_per_signal_type'] = 25 fs_kwargs['initial_noise_sigma'] = 40 fs_kwargs['noise_gradient'] = 20 fs_kwargs['n_samples_per_group'] = 4 # 25 images, 2x2 tiling scheme fs_kwargs['interpolatable'] = True fs_kwargs['random_state'] = 43 fs_kwargs['singularity'] = False fs_kwargs['clip'] = False fs = CreateArtificialFeatureSpace_Discrete(**fs_kwargs) train, test = fs.Split(random_state=False, quiet=True) train.Normalize(inplace=True, quiet=True) fw = FisherFeatureWeights.NewFromFeatureSpace(train).Threshold() train.FeatureReduce(fw, inplace=True) test.FeatureReduce(fw, inplace=True, quiet=True).Normalize(train, inplace=True, quiet=True) result = FeatureSpaceClassification.NewWND5(train, test, fw) result.Print() for class_name in result.test_set.class_names: try: self.assertEqual( result.similarity_matrix[class_name][class_name], float(1)) except: print "offending class: {0}, val: {1}".format( class_name, result.similarity_matrix[class_name][class_name]) raise
def test_PerSampleStatisticsWITHPredictedValue(self): """DISCRETE PerSampleStatistics with numeric predicted value""" fs_kwargs = {} fs_kwargs['name'] = "DISCRETE PerSampleStatistics WITH Pred Values" fs_kwargs['n_samples'] = n_samples = 40 fs_kwargs['n_classes'] = 2 fs_kwargs[ 'num_features_per_signal_type'] = 10 # small on purpose, to make test fast fs_kwargs['noise_gradient'] = 50 fs_kwargs['initial_noise_sigma'] = 75 fs_kwargs['n_samples_per_group'] = 1 fs_kwargs['random_state'] = 42 fs_kwargs['interpolatable'] = True fs_kwargs['singularity'] = False fs_kwargs['clip'] = False fs = CreateArtificialFeatureSpace_Discrete(**fs_kwargs) # Use case 1: Straight, classic WND-CHARM train/test splits ss_kwargs = {} ss_kwargs[ 'name'] = "Discrete PerSampleStatistics ShuffleSplit WITH Pred Values" ss_kwargs['quiet'] = True ss_kwargs['n_iter'] = n_iter = 10 ss_kwargs['train_size'] = train_size = 8 # per-class ss_kwargs['test_size'] = test_size = 2 # per-class ss_kwargs['random_state'] = 42 exp = FeatureSpaceClassificationExperiment.NewShuffleSplit( fs, **ss_kwargs) # Use case 2: Put LDA in pipeline (no fisher feature space prefilter, by default) ss_kwargs['lda'] = True exp = FeatureSpaceClassificationExperiment.NewShuffleSplit( fs, **ss_kwargs) ## Use case 3: LDA AND Fisher feature space prefilter #ss_kwargs['pre_lda_feature_filter'] = True #exp = FeatureSpaceClassificationExperiment.NewShuffleSplit( fs, **ss_kwargs ) ## Use case 4: LDA AND Fisher feature space prefilter, AND post-LDA dimension reduction #ss_kwargs['lda_features_size'] = 0.5 #exp = FeatureSpaceClassificationExperiment.NewShuffleSplit( fs, **ss_kwargs ) #Print calls self.GenereateStats() #from os import devnull exp.Print() #output_stream=devnull ) exp.PerSampleStatistics() #output_stream=devnull ) self.assertTrue(True)
def test_PerSampleStatisticsWITHOUTPredictedValue(self): """DISCRETE ShuffleSplit/PerSampleStatistics w/ no predicted value""" # CAN'T USE THIS, SINCE THE CLASS NAMES ARE INTERPOLATABLE # 2-class, 10 samples per class #fs = FeatureSet_Discrete.NewFromFitFile( '../wndchrm_tests/test-l.fit' ) fs_kwargs = {} fs_kwargs['name'] = "DISCRETE PerSampleStatistics No Pred Values" fs_kwargs['n_samples'] = n_samples = 20 fs_kwargs['n_classes'] = 2 fs_kwargs[ 'num_features_per_signal_type'] = 10 # small on purpose, to make test fast fs_kwargs['noise_gradient'] = 50 fs_kwargs['initial_noise_sigma'] = 75 fs_kwargs['n_samples_per_group'] = 1 fs_kwargs['random_state'] = 42 fs_kwargs['interpolatable'] = False fs_kwargs['singularity'] = False fs_kwargs['clip'] = False fs = CreateArtificialFeatureSpace_Discrete(**fs_kwargs) ss_kwargs = {} ss_kwargs[ 'name'] = "Discrete PerSampleStatistics ShuffleSplit No Pred Values" ss_kwargs['quiet'] = True ss_kwargs['n_iter'] = n_iter = 1 ss_kwargs['train_size'] = train_size = 8 # per-class ss_kwargs['test_size'] = test_size = 2 # per-class ss_kwargs['random_state'] = 42 exp = FeatureSpaceClassificationExperiment.NewShuffleSplit( fs, **ss_kwargs) ss_kwargs['lda'] = True exp = FeatureSpaceClassificationExperiment.NewShuffleSplit( fs, **ss_kwargs) #Print calls self.GenereateStats() #from os import devnull exp.Print() #output_stream=devnull ) exp.PerSampleStatistics() #output_stream=devnull ) self.assertTrue(True)
def test_HyperparameterOptimizationGraph(self): """Accuracy vs. # features or samples with and without LDA feature space transform""" testfilename = 'test_graph_rank_ordered_experiment.npy' # Make a smaller featureset to do multiple splits fs_kwargs = {} fs_kwargs['name'] = "DiscreteArtificialFS RANK ORDERED SHUFFLE SPLIT" fs_kwargs['n_samples'] = 100 # smaller fs_kwargs['n_classes'] = 5 # smaller, 20 samples per class fs_kwargs['num_features_per_signal_type'] = 10 # smaller fs_kwargs['initial_noise_sigma'] = 50 fs_kwargs['noise_gradient'] = 20 fs_kwargs['n_samples_per_group'] = 1 fs_kwargs['interpolatable'] = True fs_kwargs['random_state'] = 42 fs_kwargs['singularity'] = False fs_kwargs['clip'] = False small_fs = CreateArtificialFeatureSpace_Discrete(**fs_kwargs) ss_kwargs = {} ss_kwargs['quiet'] = False ss_kwargs['n_iter'] = n_iter = 10 ss_kwargs['train_size'] = train_size = 18 # per-class ss_kwargs['test_size'] = test_size = 2 # per-class ss_kwargs['random_state'] = 42 ss_kwargs['show_raw'] = True ss_kwargs['show_lda'] = True ss_kwargs['param'] = 'features' ss_kwargs['text_angle'] = -30 graph = HyperparameterOptimizationGraph(small_fs) graph.GridSearch(**ss_kwargs) #graph.savefig( '/Users/colettace/test_features.png' ) ss_kwargs['param'] = 'samples' ss_kwargs['quiet'] = False ss_kwargs['text_angle'] = -30 graph = HyperparameterOptimizationGraph(small_fs) graph.GridSearch(**ss_kwargs)
def test_SampleReduce( self ): from wndcharm.ArtificialFeatureSpace import CreateArtificialFeatureSpace_Discrete n_classes = 10 #======================================================== # Section 1: LEAVE IN, Untiled Discrete (w/ classes) FeatureSpace instances fs_discrete = CreateArtificialFeatureSpace_Discrete( n_samples=1000, n_classes=n_classes, num_features_per_signal_type=30, noise_gradient=5, initial_noise_sigma=10, n_samples_per_group=1, interpolatable=True) # Reduce to 9 classes from 10, one sample per class # Drop the last class: desired = range(50, 950, 100) A = fs_discrete.SampleReduce( desired ) # Further reduce to 8 classes A.RemoveClass( "FakeClass-055.6", inplace=True ) correct_samplenames = ['FakeClass-100.0_050', 'FakeClass-077.8_050', 'FakeClass-033.3_050', 'FakeClass-011.1_050', 'FakeClass+011.1_050', 'FakeClass+033.3_050', 'FakeClass+055.6_050', 'FakeClass+077.8_050'] #The actual alphanumeric sort order is different from the value sort order #correct_samplenames = ['FakeClass+011.1_050', 'FakeClass+033.3_050', 'FakeClass+055.6_050', 'FakeClass+077.8_050', 'FakeClass-011.1_050', 'FakeClass-033.3_050', 'FakeClass-077.8_050', 'FakeClass-100.0_050'] self.assertEqual( correct_samplenames, A._contiguous_sample_names ) correct_classnames = ['FakeClass-100.0', 'FakeClass-077.8', 'FakeClass-033.3', 'FakeClass-011.1', 'FakeClass+011.1', 'FakeClass+033.3', 'FakeClass+055.6', 'FakeClass+077.8' ] #correct_classnames = ['FakeClass+011.1', 'FakeClass+033.3', 'FakeClass+055.6', 'FakeClass+077.8', 'FakeClass-011.1', 'FakeClass-033.3', 'FakeClass-077.8', 'FakeClass-100.0'] self.assertEqual( correct_classnames, A.class_names ) del A #======================================================== # Section 2: LEAVE OUT, UNTiled Feature sets, Discrete FeatureSpace instances UNdesired = range(50, 950, 100) C = fs_discrete.SampleReduce( leave_out_sample_group_ids=UNdesired ) self.assertEqual( C.num_samples, fs_discrete.num_samples - len( UNdesired ) ) # Single integers for leave_out_list is ok UNdesired = 50 C = fs_discrete.SampleReduce( leave_out_sample_group_ids=UNdesired ) self.assertEqual( C.num_samples, fs_discrete.num_samples - 1 ) del C #======================================================== # Section 3: LEAVE IN, Tiled Feature sets, Discrete FeatureSpace instances num_tiles = 4 fs_discrete = CreateArtificialFeatureSpace_Discrete( n_samples=1000, n_classes=n_classes, num_features_per_signal_type=30, noise_gradient=5, initial_noise_sigma=10, n_samples_per_group=num_tiles, interpolatable=True) desired = range(5, 95, 10) # Rearrange into 9 classes D = fs_discrete.SampleReduce( desired ) # Total num samples should be 9 classes, 1 sample group per class, 4 tiles per SG = 36 self.assertEqual( num_tiles * len( desired ), D.num_samples ) del D #======================================================== # Section 4: LEAVE OUT, WITH Tiled Feature sets, Discrete FeatureSpace instances # You can't leave out a sample group that doesn't exist UNdesired = range(50000, 50010) self.assertRaises( ValueError, fs_discrete.SampleReduce, leave_out_sample_group_ids=UNdesired ) # Can't leave out trash UNdesired = ['foo', 'bar'] self.assertRaises( TypeError, fs_discrete.SampleReduce, leave_out_sample_group_ids=UNdesired ) # This input is ok: UNdesired = range(5, 95, 10) E = fs_discrete.SampleReduce( leave_out_sample_group_ids=UNdesired ) self.assertEqual( E.num_samples, fs_discrete.num_samples - len( UNdesired ) * num_tiles ) del E #======================================================== # Section 5: LEAVE IN, Untiled Continuous FeatureSpace instances from wndcharm.ArtificialFeatureSpace import CreateArtificialFeatureSpace_Continuous fs_cont = CreateArtificialFeatureSpace_Continuous( n_samples=1000, num_features_per_signal_type=30, noise_gradient=5, initial_noise_sigma=10, n_samples_per_group=1) # dummyproof desired = ['foo', 'bar'] self.assertRaises( TypeError, fs_cont.SampleReduce, desired ) desired = range(50, 950) F = fs_cont.SampleReduce( desired ) self.assertEqual( F.num_samples, len(desired) ) del F #======================================================== # Section 6: LEAVE OUT, Untiled Continuous FeatureSpace instances UNdesired = range(50, 950) G = fs_cont.SampleReduce( leave_out_sample_group_ids=UNdesired ) self.assertEqual( G.num_samples, fs_cont.num_samples - len(UNdesired) ) del G # single int is ok H = fs_cont.SampleReduce( leave_out_sample_group_ids=998 ) self.assertEqual( H.num_samples, fs_cont.num_samples - 1 ) del H #======================================================== # Section 7: LEAVE IN, TILED Continuous FeatureSpace instances fs_cont = CreateArtificialFeatureSpace_Continuous( n_samples=1000, num_features_per_signal_type=30, noise_gradient=5, initial_noise_sigma=10, n_samples_per_group=num_tiles) desired = range(50, 95) I = fs_cont.SampleReduce( desired ) self.assertEqual( I.num_samples, len(desired) * num_tiles ) del I # single int is ok, ALTHOUGH NOT SURE WHY YOU'D EVER WANT A FS WITH A SINGLE SAMPLE J = fs_cont.SampleReduce( 98 ) self.assertEqual( J.num_samples, num_tiles ) del J #======================================================== # Section 8: LEAVE OUT, TILED Continuous FeatureSpace instances UNdesired = range(50, 95) K = fs_cont.SampleReduce( leave_out_sample_group_ids=UNdesired ) self.assertEqual( K.num_samples, fs_cont.num_samples - len(UNdesired) * num_tiles ) del K # single int is ok L = fs_cont.SampleReduce( leave_out_sample_group_ids=98 ) self.assertEqual( L.num_samples, fs_cont.num_samples - num_tiles ) del L
class TestGraphs(unittest.TestCase): """Test WND-CHARM's graph-making functionality.""" fs_kwargs = {} fs_kwargs['name'] = "DiscreteArtificialFS 10-class" fs_kwargs['n_samples'] = 1000 fs_kwargs['n_classes'] = 10 fs_kwargs['num_features_per_signal_type'] = 25 fs_kwargs['initial_noise_sigma'] = 40 fs_kwargs['noise_gradient'] = 20 fs_kwargs['n_samples_per_group'] = 1 fs_kwargs['interpolatable'] = True fs_kwargs['random_state'] = 43 fs_kwargs['singularity'] = False fs_kwargs['clip'] = False fs = CreateArtificialFeatureSpace_Discrete(**fs_kwargs) train_set, test_set = fs.Split(random_state=False, quiet=True) train_set.Normalize(quiet=True) fw = FisherFeatureWeights.NewFromFeatureSpace(train_set).Threshold() reduced_train_set = train_set.FeatureReduce(fw) reduced_test_set = test_set.FeatureReduce(fw) reduced_test_set.Normalize(reduced_train_set, quiet=True) batch_result = FeatureSpaceClassification.NewWND5(reduced_train_set, reduced_test_set, fw, quiet=True) def setUp(self): self.tempdir = mkdtemp() def tearDown(self): rmtree(self.tempdir) def CompareGraphs(self, graph, testfilename): """Helper function to check output graphs""" # Uncoment to see what graph looks like! #graph.SaveToFile( testfilename + 'GRAPH.png' ) # We used to output the graphs to a png file and do a binary diff on a reference png # but there are superficial differences between matplotlib versions that result in # the points still being in the right place, but the font is slightly larger, # or the text is subtlely offset. So now, we interrogate the matplotlib.figure # object and retrieve its coordinates and check them against blessed numpy arrays # saved to a npy file. axessubplot = graph.figure.gca() if len(axessubplot.lines) > 0: # line plot try: all_coords = np.dstack( tuple([ group._path._vertices for group in axessubplot.lines ])) except AttributeError: # older version of matplotlib didn't include leading underscore in attribute # "_vertices" all_coords = np.dstack( tuple( [group._path.vertices for group in axessubplot.lines])) elif len(axessubplot.collections) > 0: # scatter plot all_coords = np.dstack( tuple([group._offsets for group in axessubplot.collections])) else: self.fail("Graph doesn't have any lines nor points") # uncomment to replace old coords #np.save( testfilename, all_coords ) #from os.path import splitext #testfilename_base, ext = splitext( testfilename ) #np.save( testfilename_base + 'NEW.npy', all_coords ) reference_array = np.load(testfilename) if not np.array_equal(all_coords, reference_array): if not np.allclose(all_coords, reference_array): errmsg = 'Reference graph "{0}" coordinates '.format(testfilename) + \ 'do not concur with coordinates generated by this test.' self.fail(errmsg) @unittest.skipIf(HasMatplotlib, "Skipped if matplotlib IS installed") def test_ErrMsgIfMatplotibNotInstalled(self): """Fail gracefully with informative message if matplotlib""" graph = PredictedValuesGraph(self.batch_result) with self.assertRaises(ImportError): graph.RankOrderedPredictedValuesGraph() with self.assertRaises(ImportError): graph.KernelSmoothedDensityGraph() @unittest.skipUnless(HasMatplotlib, "Skipped if matplotlib IS NOT installed") @unittest.expectedFailure def test_RankOrderedFromBatchClassificationResult(self): """Rank Ordered Predicted values graph from a single split""" testfilename = 'test_graph_rank_ordered_interpolated_discrete.npy' graph = PredictedValuesGraph(self.batch_result) graph.RankOrderedPredictedValuesGraph() self.CompareGraphs(graph, testfilename) @unittest.skipUnless(HasMatplotlib, "Skipped if matplotlib IS NOT installed") @unittest.expectedFailure def test_KernelSmoothedFromBatchClassificationResult(self): """Kernel Smoothed Probability density graph from a single split""" testfilename = 'test_graph_kernel_smoothed.npy' graph = PredictedValuesGraph(self.batch_result) graph.KernelSmoothedDensityGraph() self.CompareGraphs(graph, testfilename) @unittest.skipUnless(HasMatplotlib, "Skipped if matplotlib IS NOT installed") def test_FromDiscreteClassificationExperimentResults(self): """Rank Ordered Predicted values graph from an experiment result (multiple splits)""" testfilename = 'test_graph_rank_ordered_experiment.npy' # Make a smaller featureset to do multiple splits fs_kwargs = {} fs_kwargs['name'] = "DiscreteArtificialFS RANK ORDERED SHUFFLE SPLIT" fs_kwargs['n_samples'] = 100 # smaller fs_kwargs['n_classes'] = 5 # smaller, 20 samples per class fs_kwargs['num_features_per_signal_type'] = 10 # smaller fs_kwargs['initial_noise_sigma'] = 50 fs_kwargs['noise_gradient'] = 20 fs_kwargs['n_samples_per_group'] = 1 fs_kwargs['interpolatable'] = True fs_kwargs['random_state'] = 42 fs_kwargs['singularity'] = False fs_kwargs['clip'] = False small_fs = CreateArtificialFeatureSpace_Discrete(**fs_kwargs) ss_kwargs = {} ss_kwargs['quiet'] = True ss_kwargs['n_iter'] = n_iter = 10 ss_kwargs['train_size'] = train_size = 18 # per-class ss_kwargs['test_size'] = test_size = 2 # per-class ss_kwargs['random_state'] = 42 exp = FeatureSpaceClassificationExperiment.NewShuffleSplit( small_fs, **ss_kwargs) graph = PredictedValuesGraph(exp, use_averaged_results=False) graph.RankOrderedPredictedValuesGraph() self.CompareGraphs(graph, testfilename) @unittest.skipUnless(HasMatplotlib, "Skipped if matplotlib IS NOT installed") def test_HyperparameterOptimizationGraph(self): """Accuracy vs. # features or samples with and without LDA feature space transform""" testfilename = 'test_graph_rank_ordered_experiment.npy' # Make a smaller featureset to do multiple splits fs_kwargs = {} fs_kwargs['name'] = "DiscreteArtificialFS RANK ORDERED SHUFFLE SPLIT" fs_kwargs['n_samples'] = 100 # smaller fs_kwargs['n_classes'] = 5 # smaller, 20 samples per class fs_kwargs['num_features_per_signal_type'] = 10 # smaller fs_kwargs['initial_noise_sigma'] = 50 fs_kwargs['noise_gradient'] = 20 fs_kwargs['n_samples_per_group'] = 1 fs_kwargs['interpolatable'] = True fs_kwargs['random_state'] = 42 fs_kwargs['singularity'] = False fs_kwargs['clip'] = False small_fs = CreateArtificialFeatureSpace_Discrete(**fs_kwargs) ss_kwargs = {} ss_kwargs['quiet'] = False ss_kwargs['n_iter'] = n_iter = 10 ss_kwargs['train_size'] = train_size = 18 # per-class ss_kwargs['test_size'] = test_size = 2 # per-class ss_kwargs['random_state'] = 42 ss_kwargs['show_raw'] = True ss_kwargs['show_lda'] = True ss_kwargs['param'] = 'features' ss_kwargs['text_angle'] = -30 graph = HyperparameterOptimizationGraph(small_fs) graph.GridSearch(**ss_kwargs) #graph.savefig( '/Users/colettace/test_features.png' ) ss_kwargs['param'] = 'samples' ss_kwargs['quiet'] = False ss_kwargs['text_angle'] = -30 graph = HyperparameterOptimizationGraph(small_fs) graph.GridSearch(**ss_kwargs) #graph.savefig( '/Users/colettace/test_samples.png' ) @unittest.skipUnless(HasMatplotlib, "Skipped if matplotlib IS NOT installed") def test_FromHTML(self): """Rank Ordered Predicted values graph from an experiment result (multiple splits)""" testfilename = 'test_graph_fromHTML.npy' # Inflate the zipped html file into a temp file import zipfile #zipped_file_path = pychrm_test_dir + sep + 'c_elegans_terminal_bulb.html' #import zlib #zf = zipfile.ZipFile( zipped_file_path + '.zip', mode='w' ) #zf.write( zipped_file_path, compress_type=zipfile.ZIP_DEFLATED ) #zf.close() zipped_file_path = pychrm_test_dir + sep + 'c_elegans_terminal_bulb.html.zip' zf = zipfile.ZipFile(zipped_file_path, mode='r') zf.extractall(self.tempdir) htmlfilepath = self.tempdir + sep + zf.namelist()[0] graph = PredictedValuesGraph.NewFromHTMLReport( htmlfilepath, use_averaged_results=False) graph.RankOrderedPredictedValuesGraph() self.CompareGraphs(graph, testfilename) @unittest.skipUnless(HasMatplotlib, "Skipped if matplotlib IS NOTinstalled") def test_IfNotInterpolatable(self): """You can't graph predicted values if the classes aren't interpolatable.""" testfilename = 'ShouldntBeGraphable.png' small_fs = CreateArtificialFeatureSpace_Discrete(n_samples=20, n_classes=2, random_state=42, interpolatable=False) train_set, test_set = small_fs.Split(random_state=False, quiet=True) train_set.Normalize() fw = FisherFeatureWeights.NewFromFeatureSpace(train_set).Threshold() reduced_train_set = train_set.FeatureReduce(fw) reduced_test_set = test_set.FeatureReduce(fw) test_set.Normalize(train_set, quiet=True) batch_result = FeatureSpaceClassification.NewWND5(reduced_train_set, reduced_test_set, fw, quiet=True) with self.assertRaises(ValueError): graph = PredictedValuesGraph(batch_result)