# Ok...
warnings.filterwarnings('ignore')

# Loop over datasets, then over encoders, and finally, over the models
for dataset_name in datasets:
    X, y, fold_count = arff_loader.load(dataset_name)
    non_numeric = list(X.select_dtypes(exclude=[np.number]).columns.values)
    for encoder in encoders:
        print("Encoding:", dataset_name, y.name, encoder.__class__.__name__)
        folds, fit_encoder_time, score_encoder_time = train_encoder(
            X, y, fold_count, encoder)
        for model in models:
            print('Evaluating:', dataset_name, encoder.__class__.__name__,
                  model.__class__.__name__)
            scores, fit_model_time, score_model_time = train_model(
                folds, model)

            # Log into csv
            result = pd.DataFrame([
                dataset_name, y.name, encoder.__class__.__name__,
                model.__class__.__name__, X.shape[1], folds[0][0].shape[1],
                fit_encoder_time, score_encoder_time, fit_model_time,
                score_model_time
            ] + list(scores)).T
            if not os.path.isfile('./output/result.csv'):
                result.to_csv('./output/result.csv',
                              header=[
                                  'dataset', 'target', 'encoder', 'model',
                                  'input_features', 'output_features',
                                  'fit_encoder_time', 'score_encoder_time',
                                  'fit_model_time', 'score_model_time',
Exemplo n.º 2
0
             category_encoders.WOEEncoder()]

# Initialization
if os.path.isfile('./output/result.csv'):
    os.remove('./output/result.csv')

# Loop over datasets, then over encoders, and finally, over the models
for dataset_name in datasets:
    X, y, fold_count = arff_loader.load(dataset_name)
    non_numeric = list(X.select_dtypes(exclude=[np.number]).columns.values)
    for encoder in encoders:
        print("Encoding:", dataset_name, y.name, encoder.__class__.__name__)
        folds, fit_encoder_time, score_encoder_time = train_encoder(X, y, fold_count, encoder)
        for model in models:
            print('Evaluating:', dataset_name, encoder.__class__.__name__, model.__class__.__name__)
            scores, fit_model_time, score_model_time = train_model(folds, model)

            # Log into csv
            result = pd.DataFrame([dataset_name, y.name, encoder.__class__.__name__, model.__class__.__name__, X.shape[1],
                                   folds[0][0].shape[1], fit_encoder_time, score_encoder_time, fit_model_time, score_model_time]
                                  + list(scores)).T
            if not os.path.isfile('./output/result.csv'):
                result.to_csv('./output/result.csv',
                              header=['dataset', 'target', 'encoder', 'model', 'input_features', 'output_features', 'fit_encoder_time',
                                      'score_encoder_time', 'fit_model_time', 'score_model_time', 'test_matthews', 'train_matthews',
                                      'test_auc', 'train_auc', 'test_brier', 'train_brier'], index=False)
            else:
                result.to_csv('./output/result.csv', mode='a', header=False, index=False)

print('Finished. The result was stored into ./output/result.csv.')