def test_neural_network_basic(): runner = Runner('model/experiment/output/neural_network_basic', load_sample_data_frame(), 'violation', neural_network_basic, None) runner.run_classification_experiment(sample=sample, multiclass=True, record_predict_proba=True)
def test_gaussian_naive_bayes_basic(): runner = Runner('model/experiment/output/complement_naive_bayes_basic', load_sample_data_frame(), 'violation', gaussian_naive_bayes_basic, None) runner.run_classification_experiment(sample=sample, multiclass=True, record_predict_proba=True)
def build_xgboost_model(): runner = Runner('model/output/xgboost_model', load_data_frame(), 'violation', xgboost_pipeline, None) runner.run_classification_experiment(sample=sample, test_size=0.2, multiclass=True, record_predict_proba=True) joblib.dump(xgboost_pipeline, 'model/output/xgboost_model.joblib')
def build_sgd_huber_loss(): runner = Runner( 'model/output/sgd_huber_loss_over_sampled', load_clean_data_frame(), 'arrest', sgd ) runner.run_classification_experiment( sample=sample, record_predict_proba=True, transformer=binned_geo_one_hot_data_mapper, sampling=SMOTE(), fit_increment=fit_increment, max_iters=max_iters, n_jobs=1, cv=None ) joblib.dump( pipeline, 'model/output/sgd_huber_loss_over_sampled.joblib' ) runner = Runner( 'model/output/sgd_huber_loss_over_sampled_fs', load_clean_data_frame(), 'arrest', sgd_fs ) runner.run_classification_experiment( sample=sample, record_predict_proba=True, transformer=mapper, sampling=SMOTE(), fit_increment=fit_increment, max_iters=max_iters, n_jobs=1, cv=None ) joblib.dump( pipeline_fs, 'model/output/sgd_huber_loss_over_sampled_fs.joblib' )
def build_neural_network(): runner = Runner('model/output/neural_network_basic', load_clean_data_frame(), 'arrest', nn) runner.run_classification_experiment( sample=sample, record_predict_proba=True, transformer=binned_geo_one_hot_data_mapper, fit_increment=fit_increment, max_iters=max_iters, cv=None, n_jobs=1) joblib.dump(pipeline, 'model/output/neural_network_basic.joblib') runner = Runner('model/output/neural_network_basic_fs', load_clean_data_frame(), 'arrest', nn_fs) runner.run_classification_experiment(sample=sample, record_predict_proba=True, transformer=mapper, fit_increment=fit_increment, max_iters=max_iters, cv=None, n_jobs=1) joblib.dump(pipeline_fs, 'model/output/neural_network_basic_fs.joblib')
def test_sgd_log_loss(): runner = Runner('model/experiment/output/sgd_log_loss_basic', load_clean_sample_data_frame(), 'arrest', SGDClassifier(loss='log')) runner.run_classification_experiment( sample=sample, record_predict_proba=True, transformer=binned_geo_one_hot_data_mapper, fit_increment=fit_increment, max_iters=max_iters, n_jobs=1) runner = Runner('model/experiment/output/sgd_log_loss_under_sampled', load_clean_sample_data_frame(), 'arrest', SGDClassifier(loss='log')) runner.run_classification_experiment( sample=sample, record_predict_proba=True, transformer=binned_geo_one_hot_data_mapper, fit_increment=fit_increment, max_iters=max_iters, n_jobs=1, sampling=RandomUnderSampler()) runner = Runner('model/experiment/output/sgd_log_loss_over_sampled', load_clean_sample_data_frame(), 'arrest', SGDClassifier(loss='log')) runner.run_classification_experiment( sample=sample, record_predict_proba=True, transformer=binned_geo_one_hot_data_mapper, fit_increment=fit_increment, max_iters=max_iters, n_jobs=1, sampling=SMOTE()) runner = Runner('model/experiment/output/sgd_log_loss_combine_sampled', load_clean_sample_data_frame(), 'arrest', SGDClassifier(loss='log')) runner.run_classification_experiment( sample=sample, record_predict_proba=True, transformer=binned_geo_one_hot_data_mapper, fit_increment=fit_increment, max_iters=max_iters, n_jobs=1, sampling=SMOTEENN())
def test_gaussian_naive_bayes(): runner = Runner('model/experiment/output/gaussian_naive_bayes_basic', load_clean_sample_data_frame(), 'arrest', GaussianNB()) runner.run_classification_experiment( sample=sample, record_predict_proba=True, transformer=binned_geo_one_hot_data_mapper, fit_increment=fit_increment, n_jobs=1) runner = Runner( 'model/experiment/output/gaussian_naive_bayes_under_sampled', load_clean_sample_data_frame(), 'arrest', GaussianNB()) runner.run_classification_experiment( sample=sample, record_predict_proba=True, transformer=binned_geo_one_hot_data_mapper, fit_increment=fit_increment, n_jobs=1, sampling=RandomUnderSampler()) runner = Runner( 'model/experiment/output/gaussian_naive_bayes_over_sampled', load_clean_sample_data_frame(), 'arrest', GaussianNB()) runner.run_classification_experiment( sample=sample, record_predict_proba=True, transformer=binned_geo_one_hot_data_mapper, fit_increment=fit_increment, n_jobs=1, sampling=SMOTE()) runner = Runner( 'model/experiment/output/gaussian_naive_bayes_combine_sampled', load_clean_sample_data_frame(), 'arrest', GaussianNB()) runner.run_classification_experiment( sample=sample, record_predict_proba=True, transformer=binned_geo_one_hot_data_mapper, fit_increment=fit_increment, n_jobs=1, sampling=SMOTEENN())
def test_neural_network(): runner = Runner( 'model/experiment/output/neural_network_basic', load_clean_sample_data_frame(), 'arrest', MLPClassifier(hidden_layer_sizes=( 750, 125, ), verbose=True)) runner.run_classification_experiment( sample=sample, record_predict_proba=True, transformer=binned_geo_one_hot_data_mapper, fit_increment=fit_increment, max_iters=max_iters, n_jobs=1) runner = Runner( 'model/experiment/output/neural_network_under_sampled', load_clean_sample_data_frame(), 'arrest', MLPClassifier(hidden_layer_sizes=( 750, 125, ), verbose=True)) runner.run_classification_experiment( sample=sample, record_predict_proba=True, transformer=binned_geo_one_hot_data_mapper, fit_increment=fit_increment, max_iters=max_iters, n_jobs=1, sampling=RandomUnderSampler()) runner = Runner( 'model/experiment/output/neural_network_over_sampled', load_clean_sample_data_frame(), 'arrest', MLPClassifier(hidden_layer_sizes=( 750, 125, ), verbose=True)) runner.run_classification_experiment( sample=sample, record_predict_proba=True, transformer=binned_geo_one_hot_data_mapper, fit_increment=fit_increment, max_iters=max_iters, n_jobs=1, sampling=SMOTE()) runner = Runner( 'model/experiment/output/neural_network_combine_sampled', load_clean_sample_data_frame(), 'arrest', MLPClassifier(hidden_layer_sizes=( 750, 125, ), verbose=True)) runner.run_classification_experiment( sample=sample, record_predict_proba=True, transformer=binned_geo_one_hot_data_mapper, fit_increment=fit_increment, max_iters=max_iters, n_jobs=1, sampling=SMOTEENN())