# open train data dataFile1 = "/media/sf_lur/data/data_hour.csv" data1 = {} columns1 = [] loadData(dataFile1, ["location", "timestamp"], data1, columns1) modelViz = trainDecisionTree(data1, columns1, "target", {'depth': 4}) print(str(modelViz.modelColumns)) tree.export_graphviz(modelViz.model, out_file='/media/sf_lur/data/dtr.dot', feature_names=modelViz.modelColumns, max_depth=8) exit() start = time.time() model = trainRandomForest(data1, columns1, "target", {'estimators': 59, 'leaf': 9}) # importances = model.model.feature_importances_ # std = np.std([tree.feature_importances_ for tree in model.model.estimators_], # axis=0) # indices = np.argsort(importances)[::-1] # print("Feature ranking:") # for f in range(0,len(model.modelColumns)): # print(model.modelColumns[indices[f]] + " -> " + str(importances[indices[f]])) end = time.time() print(end - start) # load apply data dataFile2 = "/media/sf_lur/data/grid_hour.csv" data2 = {}
values = findOutKForValidation("location", data) output = open(OUTPUT_DIRECTORY + "errors_rae.csv", 'w') outputColumns = [] for v in values: sName = stationNames[str(v)] print("location: " + str(v) + " -> " + sName) trainData, testData = splitDataForXValidation(v, "location", data) trainColumns = [] for c in trainData: if c != "target": trainColumns.append(c) model = trainRandomForest(trainData, trainColumns, "target", { 'estimators': 59, 'leaf': 9 }) predictionData = applyRandomForest(testData, model, 0) rae = raeEval(testData["target"], predictionData) print(str(len(rae[1]))) print(str(len(predictionData))) print(str(len(testData["target"]))) # write header if len(outputColumns) == 0: outputColumns = deepcopy(trainColumns) for c in outputColumns: output.write(c) output.write(",") output.write("error_rae\n")