testPath = _basePath + "001_test_tobe.csv" # 1. read data dr = DataReader() dr.readInCSV(path, "train") newX, newY = dr._trainDataFrame, dr._ansDataFrame if doTestFlag == True: dr.readInCSV(testPath, "test") newX = dr._testDataFrame #newX = pd.DataFrame(newX[newX.columns[0]]) print newX # 2. stratify 60 % data and train location only # newX, newY = stratifyData(dr._trainDataFrame, dr._ansDataFrame, 0.4) # 3. get all best model from newX # fab = ModelFactory() # fab._gridSearchFlag = True # fab._n_iter_search = 500 # fab._expInfo = "001_location_only" # fab.getAllModels(newX, newY) # 4. test all data, output 3 ans as features modelPath = _basePath + "(Xgboost)_(2016-02-03_18_39_14).model" tmpOutPath = _basePath + "001_submission_2.csv" tmpClf = loadModel(modelPath) log(tmpClf.predict_proba(newX)) #outDf = pd.concat([newX, pd.DataFrame(tmpClf.predict_proba(newX))], axis=1) outDf = pd.DataFrame(tmpClf.predict_proba(newX)) outDf.to_csv(tmpOutPath, sep=',', encoding='utf-8') musicAlarm()
tmpId = df[df.columns[0]][i2] tmpVal = df[df.columns[1]][i2] # tmpVal2= df[df.columns[2]][i2] if tmpMainId == tmpId: tmpFlag = True print tmpVal processDf[processDf.columns[tmpVal + 394]][i1] = 1 if tmpFlag == True and tmpMainId != tmpId: tmpLastI2 = i2 break print i1, i2 # outDf = pd.concat([dr._ansDataFrame, processDf], axis=1) outDf = processDf outDf.to_csv(_outputPathName, sep=',', encoding='utf-8') # print dr._ansDataFrame if __name__ == '__main__': start = time.time() oneHot() elapsed = time.time() - start print "elapsed:", elapsed , "sec" musicAlarm()