data_path = "data/" X_train, X_test, Y_train, Y_test = get_all_data(data_path) """ Define model """ # model = RandomForestClassifier() model = RandomForestClassifier(n_estimators=800, oob_score='TRUE') """ Crossvalidation """ detect_with_cross_validation(model, X_train, Y_train) """ Detecting """ model.fit(X_train, Y_train) detect(model, X_test, Y_test) # param = { # 'n_estimators': [500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400] # } # kflod = StratifiedKFold(n_splits=10) # grid_search = GridSearchCV(estimator=model, param_grid=param, scoring='accuracy', cv=kflod) # grid_result = grid_search.fit(X_train, Y_train) # print("Best: %f using %s" % (grid_result.best_score_, grid_search.best_params_)) # # means = grid_result.cv_results_['mean_test_score'] # params = grid_result.cv_results_['params'] # for mean, param in zip(means,params): # print("%f with: %r" % (mean, param)) pyplot.bar(range(len(model.feature_importances_)), model.feature_importances_)
""" Crossvalidation """ detect_with_cross_validation(clf, X_train, y_train) """ Learning """ clf.fit(X_train, y_train) """ Detect """ detect(clf, X_test, y_test) # score = clf.score(X_test, y_test) # print score # # iterate over classifiers # for clf in classifiers: # # ax = plt.subplot(len(datasets), len(classifiers) + 1, i) # clf.fit(X_train, y_train) # score = clf.score(X_test, y_test) # # clf.fit(X, y) # # score = clf.score(X_test_, y_test_) # print score
# subsample=0.8, # colsample_bytree=0.8, # objective='binary:logistic', # nthread=4, # scale_pos_weight=1, # seed=27) """ Crossvalidation """ detect_with_cross_validation(model, np_X_train, np_y_train) """ Detect model """ model.fit(np_X_train, np_y_train) detect(model, np_X_test, np_y_test) # pyplot.bar(range(len(model.feature_importances_)), model.feature_importances_) # pyplot.show() # param = { # 'n_estimators': [1300, 1400, 1500, 1600, 1700, 1800], #[600, 700, 800, 900, 950, 1000, 1050, 1100, 1150, 1200], # 'learning_rate': [0.1], # 'max_depth': [10], # 'min_child_weight': [1], # 'gamma': [0], # 'subsample': [0.8], # 'colsample_bytree': [0.8], # 'objective': ['binary:logistic'], # 'nthread': [4], # 'scale_pos_weight': [1],