def runRFTest(fileParam, itnParam, cv_param):

    ### init
    mae_list_1 = []
    mae_list_2 = []
    time_list_1 = []
    time_list_2 = []
    ###
    testAndTrainData = getData(fileParam)
    trainingData = testAndTrainData[0]
    testData = testAndTrainData[1]

    for cnt in xrange(itnParam):
        t1 = time.time()
        theModel_1 = RandomForestClassifier()
        mae_for_param_combo_1 = perform_cross_validation(
            theModel_1, trainingData, testData, cv_param)[1]
        t2 = time.time()
        time_for_param_comb_1 = t2 - t1
        mae_list_1.append(mae_for_param_combo_1)
        time_list_1.append(time_for_param_comb_1)

        t2, t1 = 0, 0

        #n_estimators=10, criterion=entropy, max_features=sqrt, max_dept=15, max_leaf_nodes=75
        #bootstrap=False, min-sample-split=1, oob_score=False, min-wt-frac=0.3, warm-start=False
        t1 = time.time()
        theModel_2 = RandomForestClassifier(n_estimators=10,
                                            criterion='entropy',
                                            max_features='sqrt',
                                            max_depth=15,
                                            max_leaf_nodes=75,
                                            bootstrap=False,
                                            min_samples_split=1,
                                            oob_score=False,
                                            min_weight_fraction_leaf=0.3,
                                            n_jobs=-1,
                                            warm_start=False)

        mae_for_param_combo_2 = perform_cross_validation(
            theModel_2, trainingData, testData, cv_param)[1]
        t2 = time.time()
        time_for_param_comb_2 = t2 - t1
        mae_list_2.append(mae_for_param_combo_2)
        time_list_2.append(time_for_param_comb_2)

    mae_a12_ = a12_utility.doSlowA12(mae_list_1, mae_list_2)
    time_a12_ = a12_utility.doSlowA12(time_list_2, time_list_1)
    print "MAE  comaprison: is default worse than 'best combo' ?", mae_a12_
    print "time comaprison: is 'best' combo slower than default ?", time_a12_
def runCARTTest(fileParam, itnParam, cv_param):

    ### init
    mae_list_1 = []
    mae_list_2 = []
    time_list_1 = []
    time_list_2 = []
    ###
    testAndTrainData = getData(fileParam)
    trainingData = testAndTrainData[0]
    testData = testAndTrainData[1]

    for cnt in xrange(itnParam):
        t1 = time.time()
        the_Model_1 = DecisionTreeClassifier()
        mae_for_param_combo_1 = perform_cross_validation(
            the_Model_1, trainingData, testData, cv_param)[1]
        t2 = time.time()
        time_for_param_comb_1 = t2 - t1
        mae_list_1.append(mae_for_param_combo_1)
        time_list_1.append(time_for_param_comb_1)

        t2, t1 = 0, 0
        #criterion=entropy splitter=random max_features=10 max_depth=25 min_samples_split=4 min_samples_leaf=2
        #max_leaf_nodes=10000

        t1 = time.time()
        the_Model_2 = DecisionTreeClassifier(criterion='entropy',
                                             splitter='random',
                                             max_features=10,
                                             max_depth=25,
                                             min_samples_split=4,
                                             min_samples_leaf=2,
                                             max_leaf_nodes=10000)

        mae_for_param_combo_2 = perform_cross_validation(
            the_Model_2, trainingData, testData, cv_param)[1]
        t2 = time.time()
        time_for_param_comb_2 = t2 - t1
        mae_list_2.append(mae_for_param_combo_2)
        time_list_2.append(time_for_param_comb_2)

    mae_a12_ = a12_utility.doSlowA12(mae_list_1, mae_list_2)
    time_a12_ = a12_utility.doSlowA12(time_list_2, time_list_1)
    print "MAE  comaprison: is default worse than 'best combo' ?", mae_a12_
    print "time comaprison: is 'best' combo slower than default ?", time_a12_
def stat_a12_test_(valueListParam): 
  for value_for_one_classifier in valueListParam: 
    comparer = value_for_one_classifier  
    comparees = [x for x in valueListParam if x!=value_for_one_classifier] 
    print "---"
    for comparee_item in comparees: 
      #print "comparer: {}, comapree: {}".format(comparer, comparee_item)  
      a12_results = a12_utility.doSlowA12(comparer, comparee_item)
      print "----->", a12_results   
def runRFTest(fileParam, itnParam, cv_param):
 
  ### init 
  mae_list_1 = [] 
  mae_list_2 = []   
  time_list_1 = [] 
  time_list_2 = []     
  ### 
  testAndTrainData  = getData(fileParam)  
  trainingData = testAndTrainData[0]
  testData = testAndTrainData[1]
   
  for cnt in xrange(itnParam):
    t1 = time.time()  
    theModel_1 = RandomForestClassifier()  
    mae_for_param_combo_1 =  perform_cross_validation(theModel_1, trainingData, testData, cv_param)[1]
    t2 = time.time()    
    time_for_param_comb_1 =  t2 - t1 
    mae_list_1.append(mae_for_param_combo_1) 
    time_list_1.append(time_for_param_comb_1)
    
    t2, t1 = 0, 0 

    #n_estimators=10, criterion=entropy, max_features=sqrt, max_dept=15, max_leaf_nodes=75
    #bootstrap=False, min-sample-split=1, oob_score=False, min-wt-frac=0.3, warm-start=False
    t1 = time.time()         
    theModel_2     =  RandomForestClassifier(n_estimators=10, criterion='entropy', max_features='sqrt',  
                                                            max_depth=15, max_leaf_nodes=75, 
                                                            bootstrap=False, min_samples_split=1,  
                                                            oob_score=False, min_weight_fraction_leaf=0.3,  
                                                            n_jobs=-1 , warm_start=False)    

    mae_for_param_combo_2 =  perform_cross_validation(theModel_2, trainingData, testData, cv_param)[1]
    t2 = time.time()  
    time_for_param_comb_2 =  t2 - t1
    mae_list_2.append(mae_for_param_combo_2) 
    time_list_2.append(time_for_param_comb_2)    
    
    
    
  mae_a12_ = a12_utility.doSlowA12(mae_list_1, mae_list_2)    
  time_a12_ = a12_utility.doSlowA12(time_list_2, time_list_1)   
  print "MAE  comaprison: is default worse than 'best combo' ?", mae_a12_  
  print "time comaprison: is 'best' combo slower than default ?", time_a12_      
def runsvmTest(fileParam, itnParam, cv_param):

    ### init
    mae_list_1 = []
    mae_list_2 = []
    time_list_1 = []
    time_list_2 = []
    ###
    testAndTrainData = getData(fileParam)
    trainingData = testAndTrainData[0]
    testData = testAndTrainData[1]

    for cnt in xrange(itnParam):
        t1 = time.time()
        the_Model_1 = svm.SVC(kernel='rbf')
        mae_for_param_combo_1 = perform_cross_validation(
            the_Model_1, trainingData, testData, cv_param)[1]
        t2 = time.time()
        time_for_param_comb_1 = t2 - t1
        mae_list_1.append(mae_for_param_combo_1)
        time_list_1.append(time_for_param_comb_1)

        t2, t1 = 0, 0

        # C=10 shrinking=False tol=1e-05 decision_function_shape=ovr
        t1 = time.time()
        the_Model_2 = svm.SVC(kernel='rbf',
                              C=10,
                              shrinking=False,
                              tol=1e-05,
                              decision_function_shape='ovr')

        mae_for_param_combo_2 = perform_cross_validation(
            the_Model_2, trainingData, testData, cv_param)[1]
        t2 = time.time()
        time_for_param_comb_2 = t2 - t1
        mae_list_2.append(mae_for_param_combo_2)
        time_list_2.append(time_for_param_comb_2)

    mae_a12_ = a12_utility.doSlowA12(mae_list_1, mae_list_2)
    time_a12_ = a12_utility.doSlowA12(time_list_2, time_list_1)
    print "MAE  comaprison: is default worse than 'best combo' ?", mae_a12_
    print "time comaprison: is 'best' combo slower than default ?", time_a12_
def runknnTest(fileParam, itnParam, cv_param):

    ### init
    mae_list_1 = []
    mae_list_2 = []
    time_list_1 = []
    time_list_2 = []
    ###
    testAndTrainData = getData(fileParam)
    trainingData = testAndTrainData[0]
    testData = testAndTrainData[1]

    for cnt in xrange(itnParam):
        t1 = time.time()
        the_Model_1 = KNeighborsClassifier()
        mae_for_param_combo_1 = perform_cross_validation(
            the_Model_1, trainingData, testData, cv_param)[1]
        t2 = time.time()
        time_for_param_comb_1 = t2 - t1
        mae_list_1.append(mae_for_param_combo_1)
        time_list_1.append(time_for_param_comb_1)

        t2, t1 = 0, 0

        t1 = time.time()
        # n_neighbors=10 weights=distance metric=minkowski p=3 algorithm=brute
        the_Model_2 = KNeighborsClassifier(n_neighbors=10,
                                           weights='distance',
                                           metric='minkowski',
                                           p=3,
                                           algorithm='brute')

        mae_for_param_combo_2 = perform_cross_validation(
            the_Model_2, trainingData, testData, cv_param)[1]
        t2 = time.time()
        time_for_param_comb_2 = t2 - t1
        mae_list_2.append(mae_for_param_combo_2)
        time_list_2.append(time_for_param_comb_2)

    mae_a12_ = a12_utility.doSlowA12(mae_list_1, mae_list_2)
    time_a12_ = a12_utility.doSlowA12(time_list_2, time_list_1)
    print "MAE  comaprison: is default worse than 'best combo' ?", mae_a12_
    print "time comaprison: is 'best' combo slower than default ?", time_a12_
Пример #7
0
def runRFTest(orig_fileParam, synth_fileParam, itnParam, cv_param):
 
  ### init 
  mae_list_1 = [] 
  mae_list_2 = []   
  time_list_1 = [] 
  time_list_2 = []     
  ### 
  orig_testAndTrainData  = getData(orig_fileParam)  
  orig_trainingData = orig_testAndTrainData[0]
  orig_testData = orig_testAndTrainData[1]
  ### 
  synth_testAndTrainData  = getData(synth_fileParam)  
  synth_trainingData = synth_testAndTrainData[0]
  synth_testData = synth_testAndTrainData[1]  
  
   
  for cnt in xrange(itnParam):
    t1 = time.time()  
    theModel_orig = RandomForestClassifier(n_estimators=50)  
    mae_for_orig =  perform_cross_validation(theModel_orig, orig_trainingData, orig_testData, cv_param)[1]
    t2 = time.time()    
    time_for_orig =  t2 - t1 
    mae_list_1.append(mae_for_orig) 
    time_list_1.append(time_for_orig)
    
    t2, t1 = 0, 0 


    t1 = time.time()         
    theModel_synth =  RandomForestClassifier(n_estimators=50)    
    mae_for_synth =  perform_cross_validation(theModel_synth, synth_trainingData, synth_testData, cv_param)[1]
    t2 = time.time()  
    time_for_synth =  t2 - t1
    mae_list_2.append(mae_for_synth) 
    time_list_2.append(time_for_synth)    
    
    
    
  mae_a12_ = a12_utility.doSlowA12(mae_list_1, mae_list_2)    
  time_a12_ = a12_utility.doSlowA12(time_list_2, time_list_1)   
  print "MAE  comparison: is original worse than synthetic ?", mae_a12_  
  print "time comparison: is synthetic slower than original ?", time_a12_      
Пример #8
0
def stat_a12_test_(valueListParam):
    for value_for_one_classifier in valueListParam:
        comparer = value_for_one_classifier
        comparees = [
            x for x in valueListParam if x != value_for_one_classifier
        ]
        print "---"
        for comparee_item in comparees:
            #print "comparer: {}, comapree: {}".format(comparer, comparee_item)
            a12_results = a12_utility.doSlowA12(comparer, comparee_item)
            print "----->", a12_results
def runCARTTest(fileParam, itnParam, cv_param):
 
  ### init 
  mae_list_1 = [] 
  mae_list_2 = []   
  time_list_1 = [] 
  time_list_2 = []     
  ### 
  testAndTrainData  = getData(fileParam)  
  trainingData = testAndTrainData[0]
  testData = testAndTrainData[1]
   
  for cnt in xrange(itnParam):
    t1 = time.time()  
    the_Model_1 = DecisionTreeClassifier()
    mae_for_param_combo_1 =  perform_cross_validation(the_Model_1, trainingData, testData, cv_param)[1]
    t2 = time.time()    
    time_for_param_comb_1 =  t2 - t1 
    mae_list_1.append(mae_for_param_combo_1) 
    time_list_1.append(time_for_param_comb_1)
    
    t2, t1 = 0, 0 
    #criterion=entropy splitter=random max_features=10 max_depth=25 min_samples_split=4 min_samples_leaf=2 
    #max_leaf_nodes=10000

    t1 = time.time()         
    the_Model_2=DecisionTreeClassifier(criterion='entropy',splitter='random',max_features=10,max_depth=25,min_samples_split=4,min_samples_leaf=2,max_leaf_nodes=10000)  

    mae_for_param_combo_2 =  perform_cross_validation(the_Model_2, trainingData, testData, cv_param)[1]
    t2 = time.time()  
    time_for_param_comb_2 =  t2 - t1
    mae_list_2.append(mae_for_param_combo_2) 
    time_list_2.append(time_for_param_comb_2)    
    
    
    
  mae_a12_ = a12_utility.doSlowA12(mae_list_1, mae_list_2)    
  time_a12_ = a12_utility.doSlowA12(time_list_2, time_list_1)   
  print "MAE  comaprison: is default worse than 'best combo' ?", mae_a12_  
  print "time comaprison: is 'best' combo slower than default ?", time_a12_      
def runsvmTest(fileParam, itnParam, cv_param):
 
  ### init 
  mae_list_1 = [] 
  mae_list_2 = []   
  time_list_1 = [] 
  time_list_2 = []     
  ### 
  testAndTrainData  = getData(fileParam)  
  trainingData = testAndTrainData[0]
  testData = testAndTrainData[1]
   
  for cnt in xrange(itnParam):
    t1 = time.time()  
    the_Model_1 = svm.SVC(kernel='rbf')
    mae_for_param_combo_1 =  perform_cross_validation(the_Model_1, trainingData, testData, cv_param)[1]
    t2 = time.time()    
    time_for_param_comb_1 =  t2 - t1 
    mae_list_1.append(mae_for_param_combo_1) 
    time_list_1.append(time_for_param_comb_1)
    
    t2, t1 = 0, 0 

    # C=10 shrinking=False tol=1e-05 decision_function_shape=ovr
    t1 = time.time()         
    the_Model_2  =  svm.SVC(kernel='rbf', C=10, shrinking = False, tol =1e-05, decision_function_shape = 'ovr') 

    mae_for_param_combo_2 =  perform_cross_validation(the_Model_2, trainingData, testData, cv_param)[1]
    t2 = time.time()  
    time_for_param_comb_2 =  t2 - t1
    mae_list_2.append(mae_for_param_combo_2) 
    time_list_2.append(time_for_param_comb_2)    
    
    
    
  mae_a12_ = a12_utility.doSlowA12(mae_list_1, mae_list_2)    
  time_a12_ = a12_utility.doSlowA12(time_list_2, time_list_1)   
  print "MAE  comaprison: is default worse than 'best combo' ?", mae_a12_  
  print "time comaprison: is 'best' combo slower than default ?", time_a12_      
def runknnTest(fileParam, itnParam, cv_param):
 
  ### init 
  mae_list_1 = [] 
  mae_list_2 = []   
  time_list_1 = [] 
  time_list_2 = []     
  ### 
  testAndTrainData  = getData(fileParam)  
  trainingData = testAndTrainData[0]
  testData = testAndTrainData[1]
   
  for cnt in xrange(itnParam):
    t1 = time.time()  
    the_Model_1 = KNeighborsClassifier()
    mae_for_param_combo_1 =  perform_cross_validation(the_Model_1, trainingData, testData, cv_param)[1]
    t2 = time.time()    
    time_for_param_comb_1 =  t2 - t1 
    mae_list_1.append(mae_for_param_combo_1) 
    time_list_1.append(time_for_param_comb_1)
    
    t2, t1 = 0, 0 

    t1 = time.time()         
    # n_neighbors=10 weights=distance metric=minkowski p=3 algorithm=brute
    the_Model_2 = KNeighborsClassifier(n_neighbors=10, weights='distance', metric='minkowski', p=3, algorithm='brute')   

    mae_for_param_combo_2 =  perform_cross_validation(the_Model_2, trainingData, testData, cv_param)[1]
    t2 = time.time()  
    time_for_param_comb_2 =  t2 - t1
    mae_list_2.append(mae_for_param_combo_2) 
    time_list_2.append(time_for_param_comb_2)    
    
    
    
  mae_a12_ = a12_utility.doSlowA12(mae_list_1, mae_list_2)    
  time_a12_ = a12_utility.doSlowA12(time_list_2, time_list_1)   
  print "MAE  comaprison: is default worse than 'best combo' ?", mae_a12_  
  print "time comaprison: is 'best' combo slower than default ?", time_a12_