示例#1
0
	def test_12_crossValidate_svmlight_file_RegExample(self):
		##############################
		print 'Running unittests for this project: ', project_name
		print 'Running this unittest: ', self._testMethodName
		##################################
		
		do_not_to_delete = glob.glob(r'%s\*' % current_dir)
		
		from ml_functions import crossValidate_svmlight_file
		
		no_folds = 2
		no_reps = 3
		
		for do_strat_cv in [False]: #c.f. generate_modelling_input.py for regression datasets
			partitionedFiles = crossValidate_svmlight_file(svmlight_file=r'%s\toy12_complete_svmlight_file.txt' % current_dir,output_dir=current_dir,mccv=False,folds=no_folds,perc_test=0.2,repetitions=no_reps,stratified=do_strat_cv,only_write_IDs=False)
			
			f_out = open(r'%s\out_file_record_s%s.txt' % (current_dir,str(do_strat_cv)),'wb')
			try:
				for rep in range(1,(no_reps+1)):
					for FOLD in range(1,(no_folds+1)):
						for subset in ['TRAIN','TEST']:
							f_out.write("rep=%d;FOLD=%d;subset=%s;file=%s\r\n" % (rep,FOLD,subset,os.path.relpath(partitionedFiles[rep][FOLD][subset],start=current_dir)))
			finally:
				f_out.close()
				del f_out
		
		#N.B. All files produced from an initial run were cross-referenced against toy12_complete_svmlight_file.txt to see if they met expectations (train/test must not overlap [inspected for one repetition]; correct number of files; test sets must not overlap [inspected for one repetition]; test and train must be equal size i.e. 3 = 6(total)/2(folds)[inspected for one repetition]; different repetitions of CV should not be identical; train+test lines must match toy12_complete_svmlight_file.txt lines[inspected for one repetition for both strat=TRUE/FALSE]);out_file_record_s%s.txt checked for internal consistency=><DONE:OK>
		#- then, these files were renamed ..._Expected.txt
		self.compareAllExpectedAndActualFiles(current_dir)
		
		self.clean_up_if_all_checks_passed(current_dir,specific_files_not_to_delete=do_not_to_delete)
	def apply_crossValidate_svmlight_file_function_to_contrived_input_using_different_settings(self,svmlight_file):
		#####################################################################################
		#N.B. When all regression y-values made different, the use of stratified=True led to error (scikit-learn version 0.13)!
		#=> "ValueError: The least populated class in y has only 1 member, which is too few. The minimum number of labels for any class cannot be less than 2."
		######################################################################################
		
		aTPF = {}
		
		#trial_stratified_mccv_keepEverything_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=True,folds=1,perc_test=0.2,repetitions=2,stratified=True,only_write_IDs=False)
		
		#aTPF['trial_stratified_mccv_keepEverything_partitionedFiles'] = trial_stratified_mccv_keepEverything_partitionedFiles
		
		trial_nonstratified_mccv_keepEverything_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=True,folds=1,perc_test=0.2,repetitions=2,stratified=False,only_write_IDs=False)
		
		aTPF['trial_nonstratified_mccv_keepEverything_partitionedFiles'] = trial_nonstratified_mccv_keepEverything_partitionedFiles
		
		#trial_stratified_mccv_keepIDs_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=True,folds=1,perc_test=0.2,repetitions=2,stratified=True,only_write_IDs=True)
		
		#aTPF['trial_stratified_mccv_keepIDs_partitionedFiles'] = trial_stratified_mccv_keepIDs_partitionedFiles
		
		trial_nonstratified_mccv_keepIDs_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=True,folds=1,perc_test=0.2,repetitions=2,stratified=False,only_write_IDs=True)
		
		aTPF['trial_nonstratified_mccv_keepIDs_partitionedFiles'] = trial_nonstratified_mccv_keepIDs_partitionedFiles
		
		#trial_stratified_kfoldcv_keepEverything_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=False,folds=5,perc_test=0.2,repetitions=2,stratified=True,only_write_IDs=False)
		
		#aTPF['trial_stratified_kfoldcv_keepEverything_partitionedFiles'] = trial_stratified_kfoldcv_keepEverything_partitionedFiles
		
		trial_nonstratified_kfoldcv_keepEverything_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=False,folds=5,perc_test=0.2,repetitions=2,stratified=False,only_write_IDs=False)
		
		aTPF['trial_nonstratified_kfoldcv_keepEverything_partitionedFiles'] = trial_nonstratified_kfoldcv_keepEverything_partitionedFiles
		
		#trial_stratified_kfoldcv_keepIDs_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=False,folds=5,perc_test=0.2,repetitions=2,stratified=True,only_write_IDs=True)
		
		#aTPF['trial_stratified_kfoldcv_keepIDs_partitionedFiles'] = trial_stratified_kfoldcv_keepIDs_partitionedFiles
		
		trial_nonstratified_kfoldcv_keepIDs_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=False,folds=5,perc_test=0.2,repetitions=2,stratified=False,only_write_IDs=True)
		
		aTPF['trial_nonstratified_kfoldcv_keepIDs_partitionedFiles'] = trial_nonstratified_kfoldcv_keepIDs_partitionedFiles
		
		return aTPF
	def apply_crossValidate_svmlight_file_function_to_contrived_input_using_different_settings(self,svmlight_file):
		#<DONE>: d.i.a.f.ok.r (including w.r.t. argument specification and reporting sections at start of def crossValidate_svmlight_file(...):)
		
		aTPF = {}
		
		trial_stratified_mccv_keepEverything_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=True,folds=1,perc_test=0.2,repetitions=2,stratified=True,only_write_IDs=False)
		
		aTPF['trial_stratified_mccv_keepEverything_partitionedFiles'] = trial_stratified_mccv_keepEverything_partitionedFiles
		
		trial_nonstratified_mccv_keepEverything_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=True,folds=1,perc_test=0.2,repetitions=2,stratified=False,only_write_IDs=False)
		
		aTPF['trial_nonstratified_mccv_keepEverything_partitionedFiles'] = trial_nonstratified_mccv_keepEverything_partitionedFiles
		
		trial_stratified_mccv_keepIDs_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=True,folds=1,perc_test=0.2,repetitions=2,stratified=True,only_write_IDs=True)
		
		aTPF['trial_stratified_mccv_keepIDs_partitionedFiles'] = trial_stratified_mccv_keepIDs_partitionedFiles
		
		trial_nonstratified_mccv_keepIDs_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=True,folds=1,perc_test=0.2,repetitions=2,stratified=False,only_write_IDs=True)
		
		aTPF['trial_nonstratified_mccv_keepIDs_partitionedFiles'] = trial_nonstratified_mccv_keepIDs_partitionedFiles
		
		trial_stratified_kfoldcv_keepEverything_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=False,folds=5,perc_test=0.2,repetitions=2,stratified=True,only_write_IDs=False)
		
		aTPF['trial_stratified_kfoldcv_keepEverything_partitionedFiles'] = trial_stratified_kfoldcv_keepEverything_partitionedFiles
		
		trial_nonstratified_kfoldcv_keepEverything_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=False,folds=5,perc_test=0.2,repetitions=2,stratified=False,only_write_IDs=False)
		
		aTPF['trial_nonstratified_kfoldcv_keepEverything_partitionedFiles'] = trial_nonstratified_kfoldcv_keepEverything_partitionedFiles
		
		trial_stratified_kfoldcv_keepIDs_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=False,folds=5,perc_test=0.2,repetitions=2,stratified=True,only_write_IDs=True)
		
		aTPF['trial_stratified_kfoldcv_keepIDs_partitionedFiles'] = trial_stratified_kfoldcv_keepIDs_partitionedFiles
		
		trial_nonstratified_kfoldcv_keepIDs_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=False,folds=5,perc_test=0.2,repetitions=2,stratified=False,only_write_IDs=True)
		
		aTPF['trial_nonstratified_kfoldcv_keepIDs_partitionedFiles'] = trial_nonstratified_kfoldcv_keepIDs_partitionedFiles
		
		return aTPF
示例#4
0
    def apply_crossValidate_svmlight_file_function_to_contrived_input_using_different_settings(
            self, svmlight_file):
        #####################################################################################
        #N.B. When all regression y-values made different, the use of stratified=True led to error (scikit-learn version 0.13)!
        #=> "ValueError: The least populated class in y has only 1 member, which is too few. The minimum number of labels for any class cannot be less than 2."
        ######################################################################################

        aTPF = {}

        #trial_stratified_mccv_keepEverything_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=True,folds=1,perc_test=0.2,repetitions=2,stratified=True,only_write_IDs=False)

        #aTPF['trial_stratified_mccv_keepEverything_partitionedFiles'] = trial_stratified_mccv_keepEverything_partitionedFiles

        trial_nonstratified_mccv_keepEverything_partitionedFiles = crossValidate_svmlight_file(
            svmlight_file,
            output_dir="\\".join(os.path.abspath(__file__).split('\\')[:-1]),
            mccv=True,
            folds=1,
            perc_test=0.2,
            repetitions=2,
            stratified=False,
            only_write_IDs=False)

        aTPF[
            'trial_nonstratified_mccv_keepEverything_partitionedFiles'] = trial_nonstratified_mccv_keepEverything_partitionedFiles

        #trial_stratified_mccv_keepIDs_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=True,folds=1,perc_test=0.2,repetitions=2,stratified=True,only_write_IDs=True)

        #aTPF['trial_stratified_mccv_keepIDs_partitionedFiles'] = trial_stratified_mccv_keepIDs_partitionedFiles

        trial_nonstratified_mccv_keepIDs_partitionedFiles = crossValidate_svmlight_file(
            svmlight_file,
            output_dir="\\".join(os.path.abspath(__file__).split('\\')[:-1]),
            mccv=True,
            folds=1,
            perc_test=0.2,
            repetitions=2,
            stratified=False,
            only_write_IDs=True)

        aTPF[
            'trial_nonstratified_mccv_keepIDs_partitionedFiles'] = trial_nonstratified_mccv_keepIDs_partitionedFiles

        #trial_stratified_kfoldcv_keepEverything_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=False,folds=5,perc_test=0.2,repetitions=2,stratified=True,only_write_IDs=False)

        #aTPF['trial_stratified_kfoldcv_keepEverything_partitionedFiles'] = trial_stratified_kfoldcv_keepEverything_partitionedFiles

        trial_nonstratified_kfoldcv_keepEverything_partitionedFiles = crossValidate_svmlight_file(
            svmlight_file,
            output_dir="\\".join(os.path.abspath(__file__).split('\\')[:-1]),
            mccv=False,
            folds=5,
            perc_test=0.2,
            repetitions=2,
            stratified=False,
            only_write_IDs=False)

        aTPF[
            'trial_nonstratified_kfoldcv_keepEverything_partitionedFiles'] = trial_nonstratified_kfoldcv_keepEverything_partitionedFiles

        #trial_stratified_kfoldcv_keepIDs_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=False,folds=5,perc_test=0.2,repetitions=2,stratified=True,only_write_IDs=True)

        #aTPF['trial_stratified_kfoldcv_keepIDs_partitionedFiles'] = trial_stratified_kfoldcv_keepIDs_partitionedFiles

        trial_nonstratified_kfoldcv_keepIDs_partitionedFiles = crossValidate_svmlight_file(
            svmlight_file,
            output_dir="\\".join(os.path.abspath(__file__).split('\\')[:-1]),
            mccv=False,
            folds=5,
            perc_test=0.2,
            repetitions=2,
            stratified=False,
            only_write_IDs=True)

        aTPF[
            'trial_nonstratified_kfoldcv_keepIDs_partitionedFiles'] = trial_nonstratified_kfoldcv_keepIDs_partitionedFiles

        return aTPF
示例#5
0
    def apply_crossValidate_svmlight_file_function_to_contrived_input_using_different_settings(
            self, svmlight_file):
        #<DONE>: d.i.a.f.ok.r (including w.r.t. argument specification and reporting sections at start of def crossValidate_svmlight_file(...):)

        aTPF = {}

        trial_stratified_mccv_keepEverything_partitionedFiles = crossValidate_svmlight_file(
            svmlight_file,
            output_dir="\\".join(os.path.abspath(__file__).split('\\')[:-1]),
            mccv=True,
            folds=1,
            perc_test=0.2,
            repetitions=2,
            stratified=True,
            only_write_IDs=False)

        aTPF[
            'trial_stratified_mccv_keepEverything_partitionedFiles'] = trial_stratified_mccv_keepEverything_partitionedFiles

        trial_nonstratified_mccv_keepEverything_partitionedFiles = crossValidate_svmlight_file(
            svmlight_file,
            output_dir="\\".join(os.path.abspath(__file__).split('\\')[:-1]),
            mccv=True,
            folds=1,
            perc_test=0.2,
            repetitions=2,
            stratified=False,
            only_write_IDs=False)

        aTPF[
            'trial_nonstratified_mccv_keepEverything_partitionedFiles'] = trial_nonstratified_mccv_keepEverything_partitionedFiles

        trial_stratified_mccv_keepIDs_partitionedFiles = crossValidate_svmlight_file(
            svmlight_file,
            output_dir="\\".join(os.path.abspath(__file__).split('\\')[:-1]),
            mccv=True,
            folds=1,
            perc_test=0.2,
            repetitions=2,
            stratified=True,
            only_write_IDs=True)

        aTPF[
            'trial_stratified_mccv_keepIDs_partitionedFiles'] = trial_stratified_mccv_keepIDs_partitionedFiles

        trial_nonstratified_mccv_keepIDs_partitionedFiles = crossValidate_svmlight_file(
            svmlight_file,
            output_dir="\\".join(os.path.abspath(__file__).split('\\')[:-1]),
            mccv=True,
            folds=1,
            perc_test=0.2,
            repetitions=2,
            stratified=False,
            only_write_IDs=True)

        aTPF[
            'trial_nonstratified_mccv_keepIDs_partitionedFiles'] = trial_nonstratified_mccv_keepIDs_partitionedFiles

        trial_stratified_kfoldcv_keepEverything_partitionedFiles = crossValidate_svmlight_file(
            svmlight_file,
            output_dir="\\".join(os.path.abspath(__file__).split('\\')[:-1]),
            mccv=False,
            folds=5,
            perc_test=0.2,
            repetitions=2,
            stratified=True,
            only_write_IDs=False)

        aTPF[
            'trial_stratified_kfoldcv_keepEverything_partitionedFiles'] = trial_stratified_kfoldcv_keepEverything_partitionedFiles

        trial_nonstratified_kfoldcv_keepEverything_partitionedFiles = crossValidate_svmlight_file(
            svmlight_file,
            output_dir="\\".join(os.path.abspath(__file__).split('\\')[:-1]),
            mccv=False,
            folds=5,
            perc_test=0.2,
            repetitions=2,
            stratified=False,
            only_write_IDs=False)

        aTPF[
            'trial_nonstratified_kfoldcv_keepEverything_partitionedFiles'] = trial_nonstratified_kfoldcv_keepEverything_partitionedFiles

        trial_stratified_kfoldcv_keepIDs_partitionedFiles = crossValidate_svmlight_file(
            svmlight_file,
            output_dir="\\".join(os.path.abspath(__file__).split('\\')[:-1]),
            mccv=False,
            folds=5,
            perc_test=0.2,
            repetitions=2,
            stratified=True,
            only_write_IDs=True)

        aTPF[
            'trial_stratified_kfoldcv_keepIDs_partitionedFiles'] = trial_stratified_kfoldcv_keepIDs_partitionedFiles

        trial_nonstratified_kfoldcv_keepIDs_partitionedFiles = crossValidate_svmlight_file(
            svmlight_file,
            output_dir="\\".join(os.path.abspath(__file__).split('\\')[:-1]),
            mccv=False,
            folds=5,
            perc_test=0.2,
            repetitions=2,
            stratified=False,
            only_write_IDs=True)

        aTPF[
            'trial_nonstratified_kfoldcv_keepIDs_partitionedFiles'] = trial_nonstratified_kfoldcv_keepIDs_partitionedFiles

        return aTPF