Exemplo n.º 1
0
def run():
    data_loader = DataLoader(data_dir_root, data_train_file, data_test_file)
    raw_train_df, raw_test_df = data_loader.load_csv_data()
    data_loader.print_statistics()

    pre_processor = PreProcessor(
        raw_train_df,
        raw_test_df,
        cols_to_consider=cols_to_consider,
        # cols_to_consider=raw_train_df.columns[0:-1],
        target_feature='SalePrice')
    pre_processor.pre_process_data()

    print_features_info(pre_processor.raw_train_df,
                        pre_processor.clean_train_df)
    plot_target_feature(pre_processor.raw_train_df,
                        pre_processor.target_feature)
    plot_features_hist(pre_processor.raw_train_df)
    plot_correlation_numeric_features(pre_processor.clean_train_df)

    train_X, train_y = prepare_data(
        pre_processor.clean_train_df,
        class_col=pre_processor.target_feature,
        reg_encoding_features=[],
        one_hot_encoding_features=one_hot_encod_features,
        ordinal_encoding_features=features_ordinal_mappings,
        no_enc_features=no_enc_features)

    test_X, test_y = prepare_data(
        pre_processor.clean_test_df,
        class_col=pre_processor.target_feature,
        reg_encoding_features=[],
        one_hot_encoding_features=one_hot_encod_features,
        ordinal_encoding_features=features_ordinal_mappings,
        no_enc_features=no_enc_features)
    evaluator = Predictor(train_X, train_y, test_X, test_y, eval_classifiers,
                          eval_classifiers_params_grid)

    all_predictions, final_prediction = evaluator.build_models(
        grid_search=False)
    evaluation_df = evaluator.save_predictions_to_df(all_predictions,
                                                     final_prediction)
    submission_df = evaluator.save_predictions_for_submission(
        evaluation_df, id_col=pre_processor.raw_test_df['Id'])
    evaluation_df.to_csv("test_evaluation_results.csv", index=False)
    submission_df.to_csv("test_submission.csv", index=False)