Exemplo n.º 1
0
    # Fit the training data to the Survived labels and create the decision trees
    forest = forest.fit(train_x,train_y)
    #find training and cv error
    trainpred = forest.predict(train_x).astype(int)
    cvpred = forest.predict(cv_x).astype(int)
    terr = 1-np.sum(trainpred == train_y)/trainpred.shape[0]
    cverr = 1-np.sum(cvpred == cv_y)/cvpred.shape[0]
    
    # Take the same decision trees and run it on the test data
    output = forest.predict(test_x).astype(int)

    return terr,cverr,output


#load data and seperate into train and cv
data_x, data_y, test_x, headings, submission = LoadData.loadcleandata()
fraction = 0.66


###MAKE PREDICTIONS FOR SUBMISSION###############
##nummodels = 100
##predictions = np.zeros((test_x.shape[0],nummodels))
##for i in range(nummodels):
##    rseed = np.random.randint(1)
##    train_x,cv_x,train_y,cv_y = sklearn.cross_validation.train_test_split(data_x,data_y,train_size=int(fraction*data_x.shape[0]),random_state=rseed)
##    #select important features using randomized logreg
####    rlrtrain_x,rlrcv_x,rlrtest_x = randomlr(train_x,train_y,cv_x,test_x,regp=1,alpha=0.5)
####    terr,cverr,testpred = forestit(rlrtrain_x,train_y,rlrcv_x,cv_y,rlrtest_x,n_est=50)
##    #train and predict
##    terr,cverr,testpred = forestit(train_x,train_y,cv_x,cv_y,test_x,n_est=100)
##    predictions[:,i] = testpred