def test_neural_network_basic():
    runner = Runner('model/experiment/output/neural_network_basic',
                    load_sample_data_frame(), 'violation',
                    neural_network_basic, None)
    runner.run_classification_experiment(sample=sample,
                                         multiclass=True,
                                         record_predict_proba=True)
Exemplo n.º 2
0
def test_gaussian_naive_bayes_basic():
    runner = Runner('model/experiment/output/complement_naive_bayes_basic',
                    load_sample_data_frame(), 'violation',
                    gaussian_naive_bayes_basic, None)
    runner.run_classification_experiment(sample=sample,
                                         multiclass=True,
                                         record_predict_proba=True)
Exemplo n.º 3
0
def build_xgboost_model():
    runner = Runner('model/output/xgboost_model', load_data_frame(),
                    'violation', xgboost_pipeline, None)
    runner.run_classification_experiment(sample=sample,
                                         test_size=0.2,
                                         multiclass=True,
                                         record_predict_proba=True)
    joblib.dump(xgboost_pipeline, 'model/output/xgboost_model.joblib')
Exemplo n.º 4
0
def build_sgd_huber_loss():
    runner = Runner(
        'model/output/sgd_huber_loss_over_sampled',
        load_clean_data_frame(),
        'arrest',
        sgd
    )
    runner.run_classification_experiment(
        sample=sample,
        record_predict_proba=True,
        transformer=binned_geo_one_hot_data_mapper,
        sampling=SMOTE(),
        fit_increment=fit_increment,
        max_iters=max_iters,
        n_jobs=1,
        cv=None
    )
    joblib.dump(
        pipeline,
        'model/output/sgd_huber_loss_over_sampled.joblib'
    )

    runner = Runner(
        'model/output/sgd_huber_loss_over_sampled_fs',
        load_clean_data_frame(),
        'arrest',
        sgd_fs
    )
    runner.run_classification_experiment(
        sample=sample,
        record_predict_proba=True,
        transformer=mapper,
        sampling=SMOTE(),
        fit_increment=fit_increment,
        max_iters=max_iters,
        n_jobs=1,
        cv=None
    )
    joblib.dump(
        pipeline_fs,
        'model/output/sgd_huber_loss_over_sampled_fs.joblib'
    )
Exemplo n.º 5
0
def build_neural_network():
    runner = Runner('model/output/neural_network_basic',
                    load_clean_data_frame(), 'arrest', nn)
    runner.run_classification_experiment(
        sample=sample,
        record_predict_proba=True,
        transformer=binned_geo_one_hot_data_mapper,
        fit_increment=fit_increment,
        max_iters=max_iters,
        cv=None,
        n_jobs=1)
    joblib.dump(pipeline, 'model/output/neural_network_basic.joblib')

    runner = Runner('model/output/neural_network_basic_fs',
                    load_clean_data_frame(), 'arrest', nn_fs)
    runner.run_classification_experiment(sample=sample,
                                         record_predict_proba=True,
                                         transformer=mapper,
                                         fit_increment=fit_increment,
                                         max_iters=max_iters,
                                         cv=None,
                                         n_jobs=1)
    joblib.dump(pipeline_fs, 'model/output/neural_network_basic_fs.joblib')
Exemplo n.º 6
0
def test_sgd_log_loss():
    runner = Runner('model/experiment/output/sgd_log_loss_basic',
                    load_clean_sample_data_frame(), 'arrest',
                    SGDClassifier(loss='log'))
    runner.run_classification_experiment(
        sample=sample,
        record_predict_proba=True,
        transformer=binned_geo_one_hot_data_mapper,
        fit_increment=fit_increment,
        max_iters=max_iters,
        n_jobs=1)

    runner = Runner('model/experiment/output/sgd_log_loss_under_sampled',
                    load_clean_sample_data_frame(), 'arrest',
                    SGDClassifier(loss='log'))
    runner.run_classification_experiment(
        sample=sample,
        record_predict_proba=True,
        transformer=binned_geo_one_hot_data_mapper,
        fit_increment=fit_increment,
        max_iters=max_iters,
        n_jobs=1,
        sampling=RandomUnderSampler())

    runner = Runner('model/experiment/output/sgd_log_loss_over_sampled',
                    load_clean_sample_data_frame(), 'arrest',
                    SGDClassifier(loss='log'))
    runner.run_classification_experiment(
        sample=sample,
        record_predict_proba=True,
        transformer=binned_geo_one_hot_data_mapper,
        fit_increment=fit_increment,
        max_iters=max_iters,
        n_jobs=1,
        sampling=SMOTE())

    runner = Runner('model/experiment/output/sgd_log_loss_combine_sampled',
                    load_clean_sample_data_frame(), 'arrest',
                    SGDClassifier(loss='log'))
    runner.run_classification_experiment(
        sample=sample,
        record_predict_proba=True,
        transformer=binned_geo_one_hot_data_mapper,
        fit_increment=fit_increment,
        max_iters=max_iters,
        n_jobs=1,
        sampling=SMOTEENN())
def test_gaussian_naive_bayes():
    runner = Runner('model/experiment/output/gaussian_naive_bayes_basic',
                    load_clean_sample_data_frame(), 'arrest', GaussianNB())
    runner.run_classification_experiment(
        sample=sample,
        record_predict_proba=True,
        transformer=binned_geo_one_hot_data_mapper,
        fit_increment=fit_increment,
        n_jobs=1)

    runner = Runner(
        'model/experiment/output/gaussian_naive_bayes_under_sampled',
        load_clean_sample_data_frame(), 'arrest', GaussianNB())
    runner.run_classification_experiment(
        sample=sample,
        record_predict_proba=True,
        transformer=binned_geo_one_hot_data_mapper,
        fit_increment=fit_increment,
        n_jobs=1,
        sampling=RandomUnderSampler())

    runner = Runner(
        'model/experiment/output/gaussian_naive_bayes_over_sampled',
        load_clean_sample_data_frame(), 'arrest', GaussianNB())
    runner.run_classification_experiment(
        sample=sample,
        record_predict_proba=True,
        transformer=binned_geo_one_hot_data_mapper,
        fit_increment=fit_increment,
        n_jobs=1,
        sampling=SMOTE())

    runner = Runner(
        'model/experiment/output/gaussian_naive_bayes_combine_sampled',
        load_clean_sample_data_frame(), 'arrest', GaussianNB())
    runner.run_classification_experiment(
        sample=sample,
        record_predict_proba=True,
        transformer=binned_geo_one_hot_data_mapper,
        fit_increment=fit_increment,
        n_jobs=1,
        sampling=SMOTEENN())
Exemplo n.º 8
0
def test_neural_network():
    runner = Runner(
        'model/experiment/output/neural_network_basic',
        load_clean_sample_data_frame(), 'arrest',
        MLPClassifier(hidden_layer_sizes=(
            750,
            125,
        ), verbose=True))
    runner.run_classification_experiment(
        sample=sample,
        record_predict_proba=True,
        transformer=binned_geo_one_hot_data_mapper,
        fit_increment=fit_increment,
        max_iters=max_iters,
        n_jobs=1)

    runner = Runner(
        'model/experiment/output/neural_network_under_sampled',
        load_clean_sample_data_frame(), 'arrest',
        MLPClassifier(hidden_layer_sizes=(
            750,
            125,
        ), verbose=True))
    runner.run_classification_experiment(
        sample=sample,
        record_predict_proba=True,
        transformer=binned_geo_one_hot_data_mapper,
        fit_increment=fit_increment,
        max_iters=max_iters,
        n_jobs=1,
        sampling=RandomUnderSampler())

    runner = Runner(
        'model/experiment/output/neural_network_over_sampled',
        load_clean_sample_data_frame(), 'arrest',
        MLPClassifier(hidden_layer_sizes=(
            750,
            125,
        ), verbose=True))
    runner.run_classification_experiment(
        sample=sample,
        record_predict_proba=True,
        transformer=binned_geo_one_hot_data_mapper,
        fit_increment=fit_increment,
        max_iters=max_iters,
        n_jobs=1,
        sampling=SMOTE())

    runner = Runner(
        'model/experiment/output/neural_network_combine_sampled',
        load_clean_sample_data_frame(), 'arrest',
        MLPClassifier(hidden_layer_sizes=(
            750,
            125,
        ), verbose=True))
    runner.run_classification_experiment(
        sample=sample,
        record_predict_proba=True,
        transformer=binned_geo_one_hot_data_mapper,
        fit_increment=fit_increment,
        max_iters=max_iters,
        n_jobs=1,
        sampling=SMOTEENN())