Пример #1
0
def main():
    
    classifier1 = RandomForestClassifier(n_estimators = 100, max_features=0.5, max_depth=5.0)
    classifier2 = DecisionTreeClassifier(max_depth = 10, criterion = 'entropy', random_state = 0)
    classifier3 = KNeighborsClassifier(n_neighbors = 5, p = 2, metric = 'minkowski')
    classifier4 = SVC(kernel = 'rbf', C = 10.0, random_state = 0, gamma = 0.10)
    classifier5 = LogisticRegression(penalty = 'l2', C = 1.0, random_state = 0)
    classifier6 = GaussianNB()
        
    print("Reading in the training data")
    train = data_io.get_train_df()
    
    print ("Cleaning data. Check here for imputation, One hot encoding and factorization procedures..")
    train = FeatureConverter().clean_data(train)
    train.drop(['PassengerId'], axis = 1, inplace = True)
    #print train.head()
    train = train.values
    
    eclf = EnsembleClassifier(clfs = [classifier1, classifier2, classifier3, classifier5, classifier6], voting = 'hard')
    #eclf = EnsembleClassifier(clfs = [classifier2], voting = 'hard')    
    scores = cross_val_score(estimator = eclf, X = train[0:,1:], y = train[0:,0], cv = 10, scoring = 'roc_auc')
    
    print("Accuracy: %0.4f (+/- %0.3f)" % (scores.mean(), scores.std()))
    eclf.fit(train[0:,1:],train[0:,0])

    print("Saving the classifier")
    data_io.save_model(eclf)
Пример #2
0
def main():
    
    classifier1 = RandomForestClassifier(n_estimators = 100, max_features=0.5, max_depth=5.0)
    classifier2 = DecisionTreeClassifier(max_depth = 10, criterion = 'entropy', random_state = 0)
    classifier3 = KNeighborsClassifier(n_neighbors = 5, p = 2, metric = 'minkowski')
    classifier4 = SVC(kernel = 'rbf', C = 10.0, random_state = 0, gamma = 0.10)
    classifier5 = LogisticRegression(penalty = 'l2', C = 1.0, random_state = 0)
    classifier6 = GaussianNB()
    classifier7 = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)
          
    print("Reading in the training data")
    train = data_io.get_train_df()
    
    print ("Cleaning data. Check here for imputation, One hot encoding and factorization procedures..")
    train = FeatureConverter().clean_data(train)
    train.drop(['Id'], axis = 1, inplace = True)
    #print train.head()
    train = train.values
    
    #eclf = EnsembleClassifier(clfs = [classifier1, classifier2, classifier3, classifier5, classifier6], voting = 'hard')
    #eclf = EnsembleClassifier(clfs = [classifier1], voting = 'hard')
    eclf = classifier3
    #scores = cross_val_score(estimator = eclf, X = train[0:,0:-1], y = train[0:,-1], cv = 10, scoring = 'roc_auc')
    
    #print("Accuracy: %0.4f (+/- %0.3f)" % (scores.mean(), scores.std()))
    eclf.fit(train[0:,0:-1],train[0:,-1])

#     importances = eclf.feature_importances_
#     indices = np.argsort(importances)[::-1]
#     for f in range(train[0:,0:-1].shape[1]):
#         print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]]))
#         
    print("Saving the classifier")
    data_io.save_model(eclf)
Пример #3
0
def main():
    print("Loading the test data")
    classifier = data_io.load_model()
    
    print ("Load test data. And Clean..")
    test = data_io.get_test_df()
    test = FeatureConverter().clean_data(test)
    passengerIds = test['Id']
    test.drop(['Id'], axis = 1, inplace = True)
    test = test.values
    
    print("Making predictions") 
    predictions = classifier.predict(test).astype(int)
    #predictions = predictions.reshape(len(predictions), 1)
    
    print("Writing predictions to file")
    data_io.write_submission(predictions, passengerIds, ['Id', 'Cover_Type'])
Пример #4
0
def main():
    print("Loading the test data")
    classifier = data_io.load_model()

    print("Load test data. And Clean..")
    test = data_io.get_test_df()
    test = FeatureConverter().clean_data(test)
    passengerIds = test['Id']
    test.drop(['Id'], axis=1, inplace=True)
    test = test.values

    print("Making predictions")
    predictions = classifier.predict(test).astype(int)
    #predictions = predictions.reshape(len(predictions), 1)

    print("Writing predictions to file")
    data_io.write_submission(predictions, passengerIds, ['Id', 'Cover_Type'])
Пример #5
0
svc_params_grid = \
{
    'C': [0.001, 0.1, 1.0, 10.0, 100.0],
    'kernel': ['linear', 'rbf'],
    'gamma': [0.001, 0.1, 1.0, 10.0, 100.0]
}

if __name__=="__main__":
    
    print("Reading in the training data")
    train = data_io.get_train_df()
    
    print ("Cleaning data. Check here for imputation, One hot encoding and factorization procedures..")
    train = FeatureConverter().clean_data(train)
    train.drop(['PassengerId'], axis = 1, inplace = True)
    #print train.head()
    train = train.values
    
    grid_search = GridSearchCV(RandomForestClassifier(n_estimators = 100), rf_params_grid, cv = 5, verbose = 1)
    grid_search.fit(train[0:,1:],train[0:,0])
    
    print grid_search.best_params_
    
    grid_search = GridSearchCV(LogisticRegression(random_state = 0), lr_params_grid, cv = 5, verbose = 1)
    grid_search.fit(train[0:,1:],train[0:,0])
    
    print grid_search.best_params_
    
    grid_search = GridSearchCV(SVC(random_state = 0), svc_params_grid, cv = 5, verbose = 1)
    grid_search.fit(train[0:,1:],train[0:,0])
Пример #6
0
{
    'C': [0.001, 0.1, 1.0, 10.0, 100.0],
    'kernel': ['linear', 'rbf'],
    'gamma': [0.001, 0.1, 1.0, 10.0, 100.0]
}

if __name__ == "__main__":

    print("Reading in the training data")
    train = data_io.get_train_df()

    print(
        "Cleaning data. Check here for imputation, One hot encoding and factorization procedures.."
    )
    train = FeatureConverter().clean_data(train)
    train.drop(['PassengerId'], axis=1, inplace=True)
    #print train.head()
    train = train.values

    grid_search = GridSearchCV(RandomForestClassifier(n_estimators=100),
                               rf_params_grid,
                               cv=5,
                               verbose=1)
    grid_search.fit(train[0:, 1:], train[0:, 0])

    print grid_search.best_params_

    grid_search = GridSearchCV(LogisticRegression(random_state=0),
                               lr_params_grid,
                               cv=5,
                               verbose=1)