示例#1
0
    #                     n_jobs = 3)
    # cv_rf.fit(training_set, class_set)
    # print('Best Parameters using grid search: \n',
    #	cv_rf.best_params_)
    # end = time.time()
    # print('Time taken in grid search: {0: .2f}'\
    #.format(end - start))

    # Test Set Calculations -------------------------------------
    # Test error rate
    test_error_rate_rf = 1 - accuracy_rf

    # Confusion Matrix
    test_crosstb = hf.create_conf_mat(test_class_set, predictions_rf)

    # Print Variable Importance
    hf.variable_importance(importances_rf, indices_rf)

    # Cross validation
    print('Cross Validation:')
    hf.cross_val_metrics(fit_rf, training_set, class_set, print_results=True)

    print('Confusion Matrix:')
    print(test_crosstb, '\n')

    print("Here is our mean accuracy on the test set:\n {0: 0.3f}"\
        .format(accuracy_rf))

    print("The test error rate for our model is:\n {0: .3f}"\
        .format(test_error_rate_rf))
示例#2
0
test_error_rate_RF = 1 - accuracy_RF

# ROC Curve stuff
fpr2, tpr2, _ = roc_curve(predictions_RF, 
	test_class_set)	

auc_rf = auc(fpr2, tpr2)

# Uncomment to save your model as a pickle object!
# joblib.dump(fit_RF, 'pickle_models/model_rf.pkl')

if __name__=='__main__':
	# Print model parameters
	print(fit_RF)

	hf.variable_importance(import_rf, ind_rf)
	
	hf.variable_importance_plot(import_rf_desc, ind_rf)

	print('''
	############################################
	##      HYPERPARAMETER OPTIMIZATION       ##
	############################################
	'''
	)
	
	print("Note: Remove commented code to see this section")
	print("chosen parameters: {'bootstrap': True, 'criterion': 'entropy', \
	'max_depth': 4}\
	 	\nElapsed time of optimization: 189.949 seconds")
	
示例#3
0
# Set the random state for reproducibility
fit_rf = RandomForestClassifier(random_state=42)

## Set best parameters given by grid search
fit_rf.set_params(criterion='gini',
                  max_features='log2',
                  max_depth=3,
                  n_estimators=400)

# Fit model on training data
fit_rf.fit(training_set, class_set)

# Tree Specific -------------------------------------------------

# Extracting feature importance
var_imp_rf = hf.variable_importance(fit_rf)

importances_rf = var_imp_rf['importance']

indices_rf = var_imp_rf['index']

if __name__ == '__main__':
    # Print model parameters ------------------------------------
    print(fit_rf, '\n')

    # Initialize function for metrics ---------------------------
    fit_dict_rf = produce_model_metrics(fit_rf, test_set, test_class_set, 'rf')

    # Extract each piece from dictionary
    predictions_rf = fit_dict_rf['predictions']
    accuracy_rf = fit_dict_rf['accuracy']