# open train data

dataFile1 = "/media/sf_lur/data/data_hour.csv"
data1 = {}
columns1 = []
loadData(dataFile1, ["location", "timestamp"], data1, columns1)

modelViz = trainDecisionTree(data1, columns1, "target", {'depth': 4})
print(str(modelViz.modelColumns))
tree.export_graphviz(modelViz.model, out_file='/media/sf_lur/data/dtr.dot', feature_names=modelViz.modelColumns, max_depth=8)     

exit()

start = time.time()
model = trainRandomForest(data1, columns1, "target", {'estimators': 59, 'leaf': 9})

# importances = model.model.feature_importances_
# std = np.std([tree.feature_importances_ for tree in model.model.estimators_],
#              axis=0)
# indices = np.argsort(importances)[::-1]
# print("Feature ranking:")
# for f in range(0,len(model.modelColumns)):
#     print(model.modelColumns[indices[f]] + " -> " + str(importances[indices[f]]))

end = time.time()
print(end - start)

# load apply data
dataFile2 = "/media/sf_lur/data/grid_hour.csv"
data2 = {}
示例#2
0
values = findOutKForValidation("location", data)

output = open(OUTPUT_DIRECTORY + "errors_rae.csv", 'w')
outputColumns = []

for v in values:
    sName = stationNames[str(v)]
    print("location: " + str(v) + " -> " + sName)
    trainData, testData = splitDataForXValidation(v, "location", data)
    trainColumns = []
    for c in trainData:
        if c != "target":
            trainColumns.append(c)

    model = trainRandomForest(trainData, trainColumns, "target", {
        'estimators': 59,
        'leaf': 9
    })
    predictionData = applyRandomForest(testData, model, 0)
    rae = raeEval(testData["target"], predictionData)
    print(str(len(rae[1])))
    print(str(len(predictionData)))
    print(str(len(testData["target"])))

    # write header
    if len(outputColumns) == 0:
        outputColumns = deepcopy(trainColumns)
        for c in outputColumns:
            output.write(c)
            output.write(",")
        output.write("error_rae\n")