import ReadFiles
import pandas as pd
import utils
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

# read the data that we need from the original file
trainSet, trainSetFeatures, trainSetLabels, testSet, testSetFeatures, testSetLabels = ReadFiles.readFiles(
)

#train the model
# random_forest_regressor = RandomForestRegressor(n_estimators=15, max_depth=6, random_state=0)
clf = GradientBoostingRegressor(warm_start=True,
                                n_estimators=45,
                                max_depth=6,
                                random_state=25)
clf.fit(trainSetFeatures, trainSetLabels)

#make the prediction on the test set
predictedLabels = clf.predict(testSetFeatures)

#output the prediction
id_test = testSet['id']
pd.DataFrame({
    "id": id_test,
    "relevance": predictedLabels
}).to_csv('IOFolder/XGBoosting_Results.csv', index=False)

print "RMSE :\t", utils.getRMSE(testSetLabels, predictedLabels)
print "MAE :\t", utils.getMAE(testSetLabels, predictedLabels)
import ReadFiles
import numpy as np
import pandas as pd
import utils

# read the data that we need from the original file
trainSet, trainSetFeatures, trainSetLabels, testSet, testSetFeatures, testSetLabels = ReadFiles.readFiles()

# make the prediction on the test set by random guess
predictedLabels = []

for index in range(testSet.shape[0]):
    predictedLabels.append(utils.getRandFloat(1, 3))

predictedLabels = np.array(predictedLabels)

#output the prediction
id_test = testSet['id']
pd.DataFrame({"id": id_test, "relevance": predictedLabels}).to_csv('IOFolder/random_guess_results.csv', index=False)

print "RMSE :\t", utils.getRMSE(testSetLabels, predictedLabels)
print "MAE :\t", utils.getMAE(testSetLabels, predictedLabels)