def test_12_crossValidate_svmlight_file_RegExample(self): ############################## print 'Running unittests for this project: ', project_name print 'Running this unittest: ', self._testMethodName ################################## do_not_to_delete = glob.glob(r'%s\*' % current_dir) from ml_functions import crossValidate_svmlight_file no_folds = 2 no_reps = 3 for do_strat_cv in [False]: #c.f. generate_modelling_input.py for regression datasets partitionedFiles = crossValidate_svmlight_file(svmlight_file=r'%s\toy12_complete_svmlight_file.txt' % current_dir,output_dir=current_dir,mccv=False,folds=no_folds,perc_test=0.2,repetitions=no_reps,stratified=do_strat_cv,only_write_IDs=False) f_out = open(r'%s\out_file_record_s%s.txt' % (current_dir,str(do_strat_cv)),'wb') try: for rep in range(1,(no_reps+1)): for FOLD in range(1,(no_folds+1)): for subset in ['TRAIN','TEST']: f_out.write("rep=%d;FOLD=%d;subset=%s;file=%s\r\n" % (rep,FOLD,subset,os.path.relpath(partitionedFiles[rep][FOLD][subset],start=current_dir))) finally: f_out.close() del f_out #N.B. All files produced from an initial run were cross-referenced against toy12_complete_svmlight_file.txt to see if they met expectations (train/test must not overlap [inspected for one repetition]; correct number of files; test sets must not overlap [inspected for one repetition]; test and train must be equal size i.e. 3 = 6(total)/2(folds)[inspected for one repetition]; different repetitions of CV should not be identical; train+test lines must match toy12_complete_svmlight_file.txt lines[inspected for one repetition for both strat=TRUE/FALSE]);out_file_record_s%s.txt checked for internal consistency=><DONE:OK> #- then, these files were renamed ..._Expected.txt self.compareAllExpectedAndActualFiles(current_dir) self.clean_up_if_all_checks_passed(current_dir,specific_files_not_to_delete=do_not_to_delete)
def apply_crossValidate_svmlight_file_function_to_contrived_input_using_different_settings(self,svmlight_file): ##################################################################################### #N.B. When all regression y-values made different, the use of stratified=True led to error (scikit-learn version 0.13)! #=> "ValueError: The least populated class in y has only 1 member, which is too few. The minimum number of labels for any class cannot be less than 2." ###################################################################################### aTPF = {} #trial_stratified_mccv_keepEverything_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=True,folds=1,perc_test=0.2,repetitions=2,stratified=True,only_write_IDs=False) #aTPF['trial_stratified_mccv_keepEverything_partitionedFiles'] = trial_stratified_mccv_keepEverything_partitionedFiles trial_nonstratified_mccv_keepEverything_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=True,folds=1,perc_test=0.2,repetitions=2,stratified=False,only_write_IDs=False) aTPF['trial_nonstratified_mccv_keepEverything_partitionedFiles'] = trial_nonstratified_mccv_keepEverything_partitionedFiles #trial_stratified_mccv_keepIDs_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=True,folds=1,perc_test=0.2,repetitions=2,stratified=True,only_write_IDs=True) #aTPF['trial_stratified_mccv_keepIDs_partitionedFiles'] = trial_stratified_mccv_keepIDs_partitionedFiles trial_nonstratified_mccv_keepIDs_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=True,folds=1,perc_test=0.2,repetitions=2,stratified=False,only_write_IDs=True) aTPF['trial_nonstratified_mccv_keepIDs_partitionedFiles'] = trial_nonstratified_mccv_keepIDs_partitionedFiles #trial_stratified_kfoldcv_keepEverything_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=False,folds=5,perc_test=0.2,repetitions=2,stratified=True,only_write_IDs=False) #aTPF['trial_stratified_kfoldcv_keepEverything_partitionedFiles'] = trial_stratified_kfoldcv_keepEverything_partitionedFiles trial_nonstratified_kfoldcv_keepEverything_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=False,folds=5,perc_test=0.2,repetitions=2,stratified=False,only_write_IDs=False) aTPF['trial_nonstratified_kfoldcv_keepEverything_partitionedFiles'] = trial_nonstratified_kfoldcv_keepEverything_partitionedFiles #trial_stratified_kfoldcv_keepIDs_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=False,folds=5,perc_test=0.2,repetitions=2,stratified=True,only_write_IDs=True) #aTPF['trial_stratified_kfoldcv_keepIDs_partitionedFiles'] = trial_stratified_kfoldcv_keepIDs_partitionedFiles trial_nonstratified_kfoldcv_keepIDs_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=False,folds=5,perc_test=0.2,repetitions=2,stratified=False,only_write_IDs=True) aTPF['trial_nonstratified_kfoldcv_keepIDs_partitionedFiles'] = trial_nonstratified_kfoldcv_keepIDs_partitionedFiles return aTPF
def apply_crossValidate_svmlight_file_function_to_contrived_input_using_different_settings(self,svmlight_file): #<DONE>: d.i.a.f.ok.r (including w.r.t. argument specification and reporting sections at start of def crossValidate_svmlight_file(...):) aTPF = {} trial_stratified_mccv_keepEverything_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=True,folds=1,perc_test=0.2,repetitions=2,stratified=True,only_write_IDs=False) aTPF['trial_stratified_mccv_keepEverything_partitionedFiles'] = trial_stratified_mccv_keepEverything_partitionedFiles trial_nonstratified_mccv_keepEverything_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=True,folds=1,perc_test=0.2,repetitions=2,stratified=False,only_write_IDs=False) aTPF['trial_nonstratified_mccv_keepEverything_partitionedFiles'] = trial_nonstratified_mccv_keepEverything_partitionedFiles trial_stratified_mccv_keepIDs_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=True,folds=1,perc_test=0.2,repetitions=2,stratified=True,only_write_IDs=True) aTPF['trial_stratified_mccv_keepIDs_partitionedFiles'] = trial_stratified_mccv_keepIDs_partitionedFiles trial_nonstratified_mccv_keepIDs_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=True,folds=1,perc_test=0.2,repetitions=2,stratified=False,only_write_IDs=True) aTPF['trial_nonstratified_mccv_keepIDs_partitionedFiles'] = trial_nonstratified_mccv_keepIDs_partitionedFiles trial_stratified_kfoldcv_keepEverything_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=False,folds=5,perc_test=0.2,repetitions=2,stratified=True,only_write_IDs=False) aTPF['trial_stratified_kfoldcv_keepEverything_partitionedFiles'] = trial_stratified_kfoldcv_keepEverything_partitionedFiles trial_nonstratified_kfoldcv_keepEverything_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=False,folds=5,perc_test=0.2,repetitions=2,stratified=False,only_write_IDs=False) aTPF['trial_nonstratified_kfoldcv_keepEverything_partitionedFiles'] = trial_nonstratified_kfoldcv_keepEverything_partitionedFiles trial_stratified_kfoldcv_keepIDs_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=False,folds=5,perc_test=0.2,repetitions=2,stratified=True,only_write_IDs=True) aTPF['trial_stratified_kfoldcv_keepIDs_partitionedFiles'] = trial_stratified_kfoldcv_keepIDs_partitionedFiles trial_nonstratified_kfoldcv_keepIDs_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=False,folds=5,perc_test=0.2,repetitions=2,stratified=False,only_write_IDs=True) aTPF['trial_nonstratified_kfoldcv_keepIDs_partitionedFiles'] = trial_nonstratified_kfoldcv_keepIDs_partitionedFiles return aTPF
def apply_crossValidate_svmlight_file_function_to_contrived_input_using_different_settings( self, svmlight_file): ##################################################################################### #N.B. When all regression y-values made different, the use of stratified=True led to error (scikit-learn version 0.13)! #=> "ValueError: The least populated class in y has only 1 member, which is too few. The minimum number of labels for any class cannot be less than 2." ###################################################################################### aTPF = {} #trial_stratified_mccv_keepEverything_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=True,folds=1,perc_test=0.2,repetitions=2,stratified=True,only_write_IDs=False) #aTPF['trial_stratified_mccv_keepEverything_partitionedFiles'] = trial_stratified_mccv_keepEverything_partitionedFiles trial_nonstratified_mccv_keepEverything_partitionedFiles = crossValidate_svmlight_file( svmlight_file, output_dir="\\".join(os.path.abspath(__file__).split('\\')[:-1]), mccv=True, folds=1, perc_test=0.2, repetitions=2, stratified=False, only_write_IDs=False) aTPF[ 'trial_nonstratified_mccv_keepEverything_partitionedFiles'] = trial_nonstratified_mccv_keepEverything_partitionedFiles #trial_stratified_mccv_keepIDs_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=True,folds=1,perc_test=0.2,repetitions=2,stratified=True,only_write_IDs=True) #aTPF['trial_stratified_mccv_keepIDs_partitionedFiles'] = trial_stratified_mccv_keepIDs_partitionedFiles trial_nonstratified_mccv_keepIDs_partitionedFiles = crossValidate_svmlight_file( svmlight_file, output_dir="\\".join(os.path.abspath(__file__).split('\\')[:-1]), mccv=True, folds=1, perc_test=0.2, repetitions=2, stratified=False, only_write_IDs=True) aTPF[ 'trial_nonstratified_mccv_keepIDs_partitionedFiles'] = trial_nonstratified_mccv_keepIDs_partitionedFiles #trial_stratified_kfoldcv_keepEverything_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=False,folds=5,perc_test=0.2,repetitions=2,stratified=True,only_write_IDs=False) #aTPF['trial_stratified_kfoldcv_keepEverything_partitionedFiles'] = trial_stratified_kfoldcv_keepEverything_partitionedFiles trial_nonstratified_kfoldcv_keepEverything_partitionedFiles = crossValidate_svmlight_file( svmlight_file, output_dir="\\".join(os.path.abspath(__file__).split('\\')[:-1]), mccv=False, folds=5, perc_test=0.2, repetitions=2, stratified=False, only_write_IDs=False) aTPF[ 'trial_nonstratified_kfoldcv_keepEverything_partitionedFiles'] = trial_nonstratified_kfoldcv_keepEverything_partitionedFiles #trial_stratified_kfoldcv_keepIDs_partitionedFiles = crossValidate_svmlight_file(svmlight_file,output_dir= "\\".join(os.path.abspath(__file__).split('\\')[:-1]),mccv=False,folds=5,perc_test=0.2,repetitions=2,stratified=True,only_write_IDs=True) #aTPF['trial_stratified_kfoldcv_keepIDs_partitionedFiles'] = trial_stratified_kfoldcv_keepIDs_partitionedFiles trial_nonstratified_kfoldcv_keepIDs_partitionedFiles = crossValidate_svmlight_file( svmlight_file, output_dir="\\".join(os.path.abspath(__file__).split('\\')[:-1]), mccv=False, folds=5, perc_test=0.2, repetitions=2, stratified=False, only_write_IDs=True) aTPF[ 'trial_nonstratified_kfoldcv_keepIDs_partitionedFiles'] = trial_nonstratified_kfoldcv_keepIDs_partitionedFiles return aTPF
def apply_crossValidate_svmlight_file_function_to_contrived_input_using_different_settings( self, svmlight_file): #<DONE>: d.i.a.f.ok.r (including w.r.t. argument specification and reporting sections at start of def crossValidate_svmlight_file(...):) aTPF = {} trial_stratified_mccv_keepEverything_partitionedFiles = crossValidate_svmlight_file( svmlight_file, output_dir="\\".join(os.path.abspath(__file__).split('\\')[:-1]), mccv=True, folds=1, perc_test=0.2, repetitions=2, stratified=True, only_write_IDs=False) aTPF[ 'trial_stratified_mccv_keepEverything_partitionedFiles'] = trial_stratified_mccv_keepEverything_partitionedFiles trial_nonstratified_mccv_keepEverything_partitionedFiles = crossValidate_svmlight_file( svmlight_file, output_dir="\\".join(os.path.abspath(__file__).split('\\')[:-1]), mccv=True, folds=1, perc_test=0.2, repetitions=2, stratified=False, only_write_IDs=False) aTPF[ 'trial_nonstratified_mccv_keepEverything_partitionedFiles'] = trial_nonstratified_mccv_keepEverything_partitionedFiles trial_stratified_mccv_keepIDs_partitionedFiles = crossValidate_svmlight_file( svmlight_file, output_dir="\\".join(os.path.abspath(__file__).split('\\')[:-1]), mccv=True, folds=1, perc_test=0.2, repetitions=2, stratified=True, only_write_IDs=True) aTPF[ 'trial_stratified_mccv_keepIDs_partitionedFiles'] = trial_stratified_mccv_keepIDs_partitionedFiles trial_nonstratified_mccv_keepIDs_partitionedFiles = crossValidate_svmlight_file( svmlight_file, output_dir="\\".join(os.path.abspath(__file__).split('\\')[:-1]), mccv=True, folds=1, perc_test=0.2, repetitions=2, stratified=False, only_write_IDs=True) aTPF[ 'trial_nonstratified_mccv_keepIDs_partitionedFiles'] = trial_nonstratified_mccv_keepIDs_partitionedFiles trial_stratified_kfoldcv_keepEverything_partitionedFiles = crossValidate_svmlight_file( svmlight_file, output_dir="\\".join(os.path.abspath(__file__).split('\\')[:-1]), mccv=False, folds=5, perc_test=0.2, repetitions=2, stratified=True, only_write_IDs=False) aTPF[ 'trial_stratified_kfoldcv_keepEverything_partitionedFiles'] = trial_stratified_kfoldcv_keepEverything_partitionedFiles trial_nonstratified_kfoldcv_keepEverything_partitionedFiles = crossValidate_svmlight_file( svmlight_file, output_dir="\\".join(os.path.abspath(__file__).split('\\')[:-1]), mccv=False, folds=5, perc_test=0.2, repetitions=2, stratified=False, only_write_IDs=False) aTPF[ 'trial_nonstratified_kfoldcv_keepEverything_partitionedFiles'] = trial_nonstratified_kfoldcv_keepEverything_partitionedFiles trial_stratified_kfoldcv_keepIDs_partitionedFiles = crossValidate_svmlight_file( svmlight_file, output_dir="\\".join(os.path.abspath(__file__).split('\\')[:-1]), mccv=False, folds=5, perc_test=0.2, repetitions=2, stratified=True, only_write_IDs=True) aTPF[ 'trial_stratified_kfoldcv_keepIDs_partitionedFiles'] = trial_stratified_kfoldcv_keepIDs_partitionedFiles trial_nonstratified_kfoldcv_keepIDs_partitionedFiles = crossValidate_svmlight_file( svmlight_file, output_dir="\\".join(os.path.abspath(__file__).split('\\')[:-1]), mccv=False, folds=5, perc_test=0.2, repetitions=2, stratified=False, only_write_IDs=True) aTPF[ 'trial_nonstratified_kfoldcv_keepIDs_partitionedFiles'] = trial_nonstratified_kfoldcv_keepIDs_partitionedFiles return aTPF