def test_NewShuffleSplitLeastSquares(self): """CONTINUOUS SHUFFLE SPLIT LEAST SQUARES""" fs_kwargs = {} fs_kwargs['name'] = "CONTINUOUS PerSampleStatistics_TESTFS" fs_kwargs['n_samples'] = 100 fs_kwargs['num_features_per_signal_type'] = 5 fs_kwargs['initial_noise_sigma'] = 5 fs_kwargs['noise_gradient'] = 5 fs_kwargs['n_samples_per_group'] = 1 fs_kwargs['random_state'] = 43 fs_kwargs['singularity'] = True fs_kwargs['clip'] = True fs = CreateArtificialFeatureSpace_Continuous(**fs_kwargs) ss_kwargs = {} ss_kwargs['n_iter'] = 5 ss_kwargs[ 'name'] = "Continuous Shuffle Split Least Squares POSITIVE CONTROL" ss_kwargs['quiet'] = True ss_kwargs['random_state'] = 43 exp = FeatureSpaceRegressionExperiment.NewShuffleSplit(fs, **ss_kwargs) exp.GenerateStats() #exp.Print() # len( exp ) is supposed to be the number of batch results (split results) self.assertIs(len(exp), ss_kwargs['n_iter']) # Positive control - Artificial data with defaults should corellate almost perfectly self.assertAlmostEqual(exp.pearson_coeff, 1.0, delta=0.02) # Negative control - take the bottom quintile of the artificial features # which ARE functions of ground truth but should score low on linear correlation, # e.g., sin, x^2, etc. # With LSTSQ regression of noise features, pearson coeffs tend to be around -0.34 +/- .045 max_allowable_pearson_coeff = 0.4 temp_normalized_fs = fs.Normalize(inplace=False) ranked_nonzero_features = \ PearsonFeatureWeights.NewFromFeatureSpace( temp_normalized_fs ).Threshold(_all='nonzero') quintile = int(len(ranked_nonzero_features) / 5) crappy_features = ranked_nonzero_features.Slice( quintile * 4, len(ranked_nonzero_features)) #crappy_features.Print() crap_featureset = fs.FeatureReduce(crappy_features, inplace=False) ss_kwargs[ 'name'] = "Continuous Shuffle Split Least Squares NEGATIVE CONTROL" exp = FeatureSpaceRegressionExperiment.NewShuffleSplit( crap_featureset, **ss_kwargs) exp.GenerateStats() exp.PerSampleStatistics() #exp.Print() self.assertAlmostEqual(exp.pearson_coeff, 0.0, delta=max_allowable_pearson_coeff)
def test_ContinuousFitOnFit(self): from wndcharm.ArtificialFeatureSpace import CreateArtificialFeatureSpace_Discrete fs_discrete = CreateArtificialFeatureSpace_Discrete( n_samples=1000, n_classes=10, num_features_per_signal_type=30, noise_gradient=5, initial_noise_sigma=10, n_samples_per_group=1, interpolatable=True) tempdir = mkdtemp() path_to_fit = tempdir + sep + 'Artificial.fit' try: fs_discrete.ToFitFile(path_to_fit) fs_continuous = FeatureSpace.NewFromFitFile(path_to_fit, discrete=False) fs_continuous.Normalize(quiet=True) fw_reduced = PearsonFeatureWeights.NewFromFeatureSpace( fs_continuous).Threshold() fs_reduced = fs_continuous.FeatureReduce(fw_reduced) batch_result = FeatureSpaceRegression.NewMultivariateLinear( fs_reduced, fw_reduced, quiet=True) finally: rmtree(tempdir)
def test_ContinuousTrainTestSplitWithTiling(self): """Uses a synthetic preprocessed as follows: 500 total samples, 25 tiles per group 240 total features""" from wndcharm.ArtificialFeatureSpace import CreateArtificialFeatureSpace_Continuous fs = CreateArtificialFeatureSpace_Continuous( n_samples=500, num_features_per_signal_type=20, n_samples_per_group=25) from numpy.random import RandomState prng = RandomState(42) #fs.Print( verbose=True ) #print "\n\n\n********************\n\n\n" train, test = fs.Split(random_state=prng, quiet=True) #full_train.Print( verbose=True ) #full_test.Print( verbose=True ) train.Normalize(inplace=True, quiet=True) fw = PearsonFeatureWeights.NewFromFeatureSpace(train).Threshold() train.FeatureReduce(fw, inplace=True) test.FeatureReduce(fw, inplace=True).Normalize(train, inplace=True, quiet=True)
def test_NewShuffleSplitLinearMultivariateRegression(self): """CONTINUOUS SHUFFLE SPLIT LINEAR MULTIVARIATE METHOD""" fs_kwargs = {} fs_kwargs['name'] = "CONTINUOUS PerSampleStatistics_TESTFS" fs_kwargs['n_samples'] = 100 fs_kwargs['num_features_per_signal_type'] = 5 fs_kwargs['initial_noise_sigma'] = 5 fs_kwargs['noise_gradient'] = 5 fs_kwargs['n_samples_per_group'] = 1 fs_kwargs['random_state'] = 43 fs_kwargs['singularity'] = True fs_kwargs['clip'] = False fs = CreateArtificialFeatureSpace_Continuous(**fs_kwargs) ss_kwargs = {} ss_kwargs['n_iter'] = 5 ss_kwargs[ 'name'] = "Continuous Shuffle Split Multivariate-Regression POSITIVE CONTROL" ss_kwargs['quiet'] = True ss_kwargs['random_state'] = 43 ss_kwargs['classifier'] = 'linear' exp = FeatureSpaceRegressionExperiment.NewShuffleSplit(fs, **ss_kwargs) exp.GenerateStats() #exp.Print() self.assertIs(len(exp), ss_kwargs['n_iter']) # Positive control - Artificial data with defaults should corellate almost perfectly self.assertAlmostEqual(exp.pearson_coeff, 1.0, delta=0.03) # Negative control - take the bottom quintile of the artificial features # which ARE functions of ground truth but should score low on linear correlation, # e.g., sin, x^2, etc. # Voting method with crap features tends to be around 0.14 +/- 0.04 max_allowable_pearson_coeff = 0.2 temp_normalized_fs = fs.Normalize(inplace=False) ranked_nonzero_features = \ PearsonFeatureWeights.NewFromFeatureSpace( temp_normalized_fs ).Threshold(_all='nonzero') quintile = int(len(ranked_nonzero_features) / 5) crappy_features = ranked_nonzero_features[quintile * 4:len(ranked_nonzero_features )] #crappy_features.Print() crap_featureset = fs.FeatureReduce(crappy_features) ss_kwargs[ 'name'] = "Continuous Shuffle Split Linear Multivariate-Regression NEGATIVE CONTROL", exp = FeatureSpaceRegressionExperiment.NewShuffleSplit( crap_featureset, **ss_kwargs) exp.GenerateStats() #exp.Print() self.assertAlmostEqual(exp.pearson_coeff, 0.0, delta=max_allowable_pearson_coeff)
def test_MultivariateLinearFitOnFitNoTiling(self): fake_continuous = CreateArtificialFeatureSpace_Continuous( n_samples=100, num_features_per_signal_type=5, noise_gradient=5, initial_noise_sigma=10, n_samples_per_group=1) fake_continuous.Normalize(quiet=True) reduced_fw = PearsonFeatureWeights.NewFromFeatureSpace( fake_continuous).Threshold() reduced_fs = fake_continuous.FeatureReduce(reduced_fw) batch_result = FeatureSpaceRegression.NewMultivariateLinear( test_set=reduced_fs, feature_weights=reduced_fw, quiet=True)
def test_LeastSquaresFitOnFitLeaveOneOutNoTiling(self): fake_continuous = CreateArtificialFeatureSpace_Continuous( n_samples=100, num_features_per_signal_type=5, noise_gradient=5, initial_noise_sigma=10, n_samples_per_group=1) normalized_fs = fake_continuous.Normalize(inplace=False, quiet=True) reduced_fw = PearsonFeatureWeights.NewFromFeatureSpace( normalized_fs).Threshold() reduced_fs = fake_continuous.FeatureReduce(reduced_fw) batch_result = FeatureSpaceRegression.NewLeastSquares( training_set=reduced_fs, test_set=None, feature_weights=reduced_fw, quiet=True)