def getData(fileNameParam):
    testAndTrainData = IO_.giveTestAndTrainingData(fileNameParam)

    trainData = testAndTrainData[0]
    testData = testAndTrainData[1]

    return trainData, testData
def experiemnt_random_forest(fileNameParam):
  import exp_x_classifiers , IO_ 
  testAndTrainData = IO_.giveTestAndTrainingData(fileNameParam)
  #print testAndTrainData
  print "This is 'experiemnt_random_forest' "  
  
  # settign up train data 
  trainData = testAndTrainData[0]
  #print trainData
  original_rows = trainData.shape[0]
  original_cols =  trainData.shape[1] 
  print "Size of  training data : rows: {}, columns: {}".format( original_rows , original_cols )
  
  # settign up test data 
  testData = testAndTrainData[1]   
  #print testData  
  for selCount in xrange(original_cols):
    count_ = selCount + 1 
    if count_ <= original_cols:      
      slected_training_data = giveSelectedTrainingData(trainData, testData, count_ ) 
      print "#################  No. of features to work with={}  ############".format(count_)
      print "Size of selected training data : ", slected_training_data.shape
      emperiemntSplitters=[float(x)/float(10) for x in xrange(10) if x > 0] 
      for elem in emperiemntSplitters:
	  #print "Training size: {} %".format(float(elem*100))
	  exp_x_classifiers.runRandomForest(slected_training_data, testData, elem)
Пример #3
0
def experiemnt_CART(fileNameParam):
    import exp_x_classifiers, IO_
    testAndTrainData = IO_.giveTestAndTrainingData(fileNameParam)
    print "This is 'experiemnt_CART' "

    # settign up train data
    trainData = testAndTrainData[0]
    original_rows = trainData.shape[0]
    original_cols = trainData.shape[1]
    print "Size of  training data : rows: {}, columns: {}".format(
        original_rows, original_cols)

    # settign up test data
    testData = testAndTrainData[1]
    #  for selCount in xrange(original_cols):
    #    count_ = selCount + 1
    #    if count_ < original_cols:
    slected_training_data = giveSelectedTrainingData(trainData, testData,
                                                     original_cols)
    print "#################  No. of features to work with={}  ############".format(
        original_cols)
    print "Size of selected training data : ", slected_training_data.shape
    emperiemntSplitters = [float(x) / float(10) for x in xrange(10) if x > 0]
    for elem in emperiemntSplitters:
        #print "Training size: {} %".format(float(elem*100))
        param_exp_classifier.runCART(slected_training_data, testData, elem)
def getData(fileNameParam): 
  testAndTrainData = IO_.giveTestAndTrainingData(fileNameParam)
  
  trainData = testAndTrainData[0]
  testData = testAndTrainData[1]    
 
  return trainData, testData   
def experiemnt_gaussian_naive_bayes(fileNameParam):
    testAndTrainData = IO_.giveTestAndTrainingData(fileNameParam)
    print "This is 'experiemnt_gaussian_naive_bayes' "

    # settign up train data
    trainData = testAndTrainData[0]
    original_rows = trainData.shape[0]
    original_cols = trainData.shape[1]
    print "Size of  training data : rows: {}, columns: {}".format(
        original_rows, original_cols)

    # settign up test data
    testData = testAndTrainData[1]
    for selCount in xrange(original_cols):
        count_ = selCount + 1
        if count_ <= original_cols:
            slected_training_data = giveSelectedTrainingData(
                trainData, testData, count_)
            print "#################  No. of features to work with={}  ############".format(
                count_)
            print "Size of selected training data : ", slected_training_data.shape
            emperiemntSplitters = [
                float(x) / float(10) for x in xrange(10) if x > 0
            ]
            for elem in emperiemntSplitters:
                #print "Training size: {} %".format(float(elem*100))
                exp_x_classifiers.runGNB(slected_training_data, testData, elem)
def experiment_mobilesoft_knn(fileNameParam):
    testAndTrainData = IO_.giveTestAndTrainingData(fileNameParam)
    trainData = testAndTrainData[0]
    testData = testAndTrainData[1]
    #print trainData
    selected_training_data = getPCAedFeatures(trainData)
    print "Size of selected training data : ", np.shape(selected_training_data)
    print "=" * 50
    exp_x_classifiers.runKNN(selected_training_data, testData, 0.90)
    print "=" * 50
Пример #7
0
def mobilesoft_cart(fileNameParam, fileToWriteP):
    testAndTrainData = IO_.giveTestAndTrainingData(fileNameParam)
    trainData = testAndTrainData[0]
    testData = testAndTrainData[1]
    selected_training_data = pca_mobilesoft.getPCAedFeatures(trainData)
    print "Size of selected training data : ", np.shape(selected_training_data)
    print "=" * 50

    dict_of_results = param_exp_classifier.runCART(selected_training_data,
                                                   testData, 0.90)
    reportStr = param_exp_analysis.analyzeThis(dict_of_results)
    IO_.writeStrToFile(fileToWriteP, reportStr)
def speedup_random_forest(fileNameParam, fileToWriteP):

  testAndTrainData = IO_.giveTestAndTrainingData(fileNameParam)
  trainData = testAndTrainData[0]
  testData  = testAndTrainData[1]
  #print trainData
  selected_training_data = pca_mobilesoft.getPCAedFeatures(trainData)
  print "Size of selected training data : ", np.shape(selected_training_data)
  print "="*50

  dict_of_results = runRandomForest(selected_training_data, testData)
  reportStr = param_exp_analysis.analyzeThis(dict_of_results)
  IO_.writeStrToFile(fileToWriteP, reportStr)
def mobilesoft_cart(fileNameParam, fileToWriteP):
  indexVector = [0, 5, 10, 12, 13, 18, 19, 20]
  testAndTrainData = IO_.giveTestAndTrainingData(fileNameParam)
  trainData = testAndTrainData[0]
  testData  = testAndTrainData[1]
  selected_training_data = createMobileSoftFeatures(trainData, indexVector)
  print "Size of selected training data : ", np.shape(selected_training_data)
  print "="*50
  print "Glimpse at  selected features (10th entry): \n", selected_training_data.iloc[9, :]
  print "="*50
  print "Glimpse at  labels (10th entry): \n", testData.iloc[9]
  print "="*50
  dict_of_results = param_exp_classifier.runCART(selected_training_data, testData, 0.90)
  reportStr = param_exp_analysis.analyzeThis(dict_of_results)
  IO_.writeStrToFile(fileToWriteP, reportStr)
def experiment_mobilesoft_knn(fileNameParam, indexVector):
    testAndTrainData = IO_.giveTestAndTrainingData(fileNameParam)
    trainData = testAndTrainData[0]
    testData = testAndTrainData[1]
    #print trainData
    selected_training_data = createMobileSoftFeatures(trainData, indexVector)
    print "Size of selected training data : ", np.shape(selected_training_data)
    print "=" * 50
    print "Glimpse at  selected features (10th entry): \n", selected_training_data.iloc[
        9, :]
    print "=" * 50
    print "Glimpse at  labels (10th entry): \n", testData.iloc[9]
    print "=" * 50
    exp_x_classifiers.runKNN(selected_training_data, testData, 0.90)
    print "=" * 50
def experiemnt_random_forest(fileNameParam, fileToWriteP):
  
  testAndTrainData = IO_.giveTestAndTrainingData(fileNameParam)
  print "This is 'experiemnt_random_forest' "  
  
  # settign up train data 
  trainData = testAndTrainData[0]
  original_rows = trainData.shape[0]
  original_cols =  trainData.shape[1] 
  print "Size of  training data : rows: {}, columns: {}".format( original_rows , original_cols )
  
  # settign up test data 
  testData = testAndTrainData[1]   
  dict_of_results = param_exp_classifier.runRandomForest(trainData, testData)
  reportStr = param_exp_analysis.analyzeThis(dict_of_results)
  IO_.writeStrToFile(fileToWriteP, reportStr)
Пример #12
0
def experiemnt_random_forest(fileNameParam, fileToWriteP):

    testAndTrainData = IO_.giveTestAndTrainingData(fileNameParam)
    print "This is 'experiemnt_random_forest' "

    # settign up train data
    trainData = testAndTrainData[0]
    original_rows = trainData.shape[0]
    original_cols = trainData.shape[1]
    print "Size of  training data : rows: {}, columns: {}".format(
        original_rows, original_cols)

    # settign up test data
    testData = testAndTrainData[1]
    dict_of_results = param_exp_classifier.runRandomForest(trainData, testData)
    reportStr = param_exp_analysis.analyzeThis(dict_of_results)
    IO_.writeStrToFile(fileToWriteP, reportStr)
def runSVM(fileNamaParam, trainizingSizeParam):
  # what percent will you use ? 
  testSplitSize = 1.0 - trainizingSizeParam
  testAndTrainData = IO_.giveTestAndTrainingData(fileNamaParam)
  trainData = testAndTrainData[0]
  testData = testAndTrainData[1]
  ### classification   
  ## get the test and training sets   
  featureSpace_train, featureSpace_test, vScore_train, vScore_test = cross_validation.train_test_split(trainData, testData, test_size=testSplitSize, random_state=0) 
  ## fire up the model 
  theSVMModel = svm.SVC(kernel='rbf', C=1).fit(featureSpace_train, vScore_train)   
  thePredictedScores = theSVMModel.predict(featureSpace_test)
  #print "The original vector: "
  #print vScore_test
  #print "The predicted score vector: "
  #print thePredictedScores
  evalClassifier(vScore_test, thePredictedScores) 
Пример #14
0
def runCART(fileNamaParam, trainizingSizeParam):
    # what percent will you use ?
    testSplitSize = 1.0 - trainizingSizeParam
    testAndTrainData = IO_.giveTestAndTrainingData(fileNamaParam)
    trainData = testAndTrainData[0]
    testData = testAndTrainData[1]
    ### classification
    ## get the test and training sets
    featureSpace_train, featureSpace_test, vScore_train, vScore_test = cross_validation.train_test_split(
        trainData, testData, test_size=testSplitSize, random_state=0)
    ## fire up the model
    theQDAModel = DecisionTreeClassifier()
    theQDAModel.fit(featureSpace_train, vScore_train)
    thePredictedScores = theQDAModel.predict(featureSpace_test)
    #print "The original vector: "
    #print vScore_test
    #print "The predicted score vector: "
    #print thePredictedScores
    evalClassifier(vScore_test, thePredictedScores)