testPath = _basePath + "001_test_tobe.csv" # 1. read data dr = DataReader() dr.readInCSV(path, "train") newX, newY = dr._trainDataFrame, dr._ansDataFrame if doTestFlag == True: dr.readInCSV(testPath, "test") newX = dr._testDataFrame #newX = pd.DataFrame(newX[newX.columns[0]]) print newX # 2. stratify 60 % data and train location only # newX, newY = stratifyData(dr._trainDataFrame, dr._ansDataFrame, 0.4) # 3. get all best model from newX # fab = ModelFactory() # fab._gridSearchFlag = True # fab._n_iter_search = 500 # fab._expInfo = "001_location_only" # fab.getAllModels(newX, newY) # 4. test all data, output 3 ans as features modelPath = _basePath + "(Xgboost)_(2016-02-03_18_39_14).model" tmpOutPath = _basePath + "001_submission_2.csv" tmpClf = loadModel(modelPath) log(tmpClf.predict_proba(newX)) #outDf = pd.concat([newX, pd.DataFrame(tmpClf.predict_proba(newX))], axis=1) outDf = pd.DataFrame(tmpClf.predict_proba(newX)) outDf.to_csv(tmpOutPath, sep=',', encoding='utf-8') musicAlarm()
# 4. test all data, output 3 ans as features # D:\Kaggle\Telstra\004_one_hot_resource_type\(Xgboost)_(2016-02-06_11_14_31).model # D:\Kaggle\Telstra\004_one_hot_resource_type\(Random_Forest)_(2016-02-06_11_24_09).model # D:\Kaggle\Telstra\004_one_hot_resource_type\(Extra_Trees)_(2016-02-06_11_30_52).model # D:\Kaggle\Telstra\004_one_hot_resource_type\(K_NN)_(2016-02-06_11_40_10).model modelFolder = _basePath + "models" + Config.osSep + "stacked" + Config.osSep clfNameList = [] clfNameList.append("Extra_Trees") clfNameList.append("K_NN") clfNameList.append("Random_Forest") clfNameList.append("Xgboost") clfNameList.append("Logistic_Regression") testCsv = _basePath + "010_train_tobe.csv" dr = DataReader() newX, testY = dr.cvtPathListToDfList(testCsv, "train") for curModel in clfNameList: modelPath = modelFolder + str(getMatchNameModelPath(modelFolder, curModel)) tmpOutPath = _basePath + expNo + "_blender" + curModel + "_train.csv" tmpClf = loadModel( modelPath) log(tmpClf.predict_proba(newX)) #outDf = pd.concat([newX, pd.DataFrame(tmpClf.predict_proba(newX))], axis=1) outDf = pd.DataFrame(tmpClf.predict_proba(newX)) outDf.to_csv(tmpOutPath, sep=',', encoding='utf-8') #musicAlarm()