Exemplo n.º 1
0
    plot_decision_function(X, y, clf, ax[0], '(a)')

    clf = make_pipeline(SMOTE(random_state=0), LinearSVC(random_state=0))
    clf.fit(X, y)
    plot_decision_function(X, y, clf, ax[1], '(b)')
    fig.tight_layout()

    plt.savefig(
        join(analysis_path, 'resampling_decision_function.pdf'),
        bbox_inches='tight',
        pad_inches = 0
    )

if __name__=='__main__':

    data_path, results_path, analysis_path = generate_paths()

    # load datasets
    datasets = load_datasets(data_dir=data_path)

    # load results
    results = []
    for name in RESULTS_NAMES:
        file_path = join(results_path, f'{name}.pkl')
        results.append(pd.read_pickle(file_path))

    # combine and select results
    results = combine_results(*results)
    results = select_results(results, oversamplers_names=OVRS_NAMES, classifiers_names=CLFS_NAMES)

    # datasets description
Exemplo n.º 2
0
        ('KNN', KNeighborsClassifier(), {'n_neighbors': [3, 5, 8]}),
        ('RF', RandomForestClassifier(), {'max_depth':
        [None, 3, 6], 'n_estimators': [50, 100, 200]})
    ],
    'scoring': ['accuracy', 'f1_macro', 'geometric_mean_score_macro'],
    'n_splits': 5,
    'n_runs': 3,
    'rnd_seed': 0,
    'n_jobs': -1
}


if __name__ == '__main__':

    # Extract paths
    data_dir, results_dir, _ = generate_paths()

    # Load datasets
    datasets = load_datasets(data_dir=data_dir)

    # Extract oversamplers
    oversamplers = CONFIG['oversamplers']

    # Generate oversamplers
    for oversampler in oversamplers:

        # Define and fit experiment
        experiment = ImbalancedExperiment(
            oversamplers=[oversampler],
            classifiers=CONFIG['classifiers'],
            scoring=CONFIG['scoring'],
Exemplo n.º 3
0
    ],
    'scoring': ['accuracy', 'f1_macro', 'geometric_mean_score_macro'],
    'n_splits':
    5,
    'n_runs':
    3,
    'rnd_seed':
    0,
    'n_jobs':
    -1
}

if __name__ == '__main__':

    # Extract paths
    data_path, results_path, _ = generate_paths()

    # Load lucas dataset
    datasets = load_datasets(data_path=data_path, data_type='csv')

    # Extract oversamplers
    oversamplers = CONFIG['oversamplers']

    # Generate oversamplers
    for oversampler in oversamplers:

        # Define and fit experiment
        experiment = ImbalancedExperiment(
            oversamplers=[oversampler],
            classifiers=CONFIG['classifiers'],
            scoring=CONFIG['scoring'],
Exemplo n.º 4
0
def createGens(input_example=INPUT_EXAMPLE,
               output_example=OUTPUT_EXAMPLE,
               shape=(256, 256),
               batch_size=32,
               split=0.2):
    '''
        Denotes and returns Train and Validation genrators ...
    '''
    input_root, input_suffix, output_root, output_suffix, output_separator = generate_paths(
        input_example, output_example)

    print("Input: ", input_root)
    print("Output root: ", output_root)

    image_paths = glob(os.path.join(input_root, "*" + input_suffix))
    print("-- #Image_paths: ", len(image_paths))
    mask_paths = glob(os.path.join(output_root, "*" + output_suffix))
    print("-- #Mask_paths: ", len(mask_paths))

    if MASK_PATHS != "":  # update mask paths to the dedicated ones if it is denoted ...
        with open(MASK_PATHS, 'r') as f:
            dedicated_mask_paths = json.load(f)
        attribute = OUTPUT_EXAMPLE.split(output_separator)[-1].split('.')[0]
        mask_paths = dedicated_mask_paths[attribute]
        print(" -- However we gonna consume only ", len(mask_paths))

    if PREPROPROCESSING_MASK_EXAMPLE:
        # TODO: we care only about preprocessing_output and suffix
        _, _, preprocessing_root, preprocessing_suffix, _ = generate_paths \
            (INPUT_EXAMPLE, PREPROPROCESSING_MASK_EXAMPLE)
        preprocessing_paths = glob(os.path.join(preprocessing_root, "*"+\
            preprocessing_suffix))
        preprocessing_ids = list(map(lambda  path: path.split('/')[-1]\
            .split(preprocessing_suffix)[0], \
                preprocessing_paths))
    else:
        preprocessing_paths = []

    # apparently not all images have a corresponding mask so we need the intersection of those
    image_ids = list(map(lambda x: x.split('/')[-1].split(input_suffix)[0], \
         image_paths))
    mask_ids = list(map(lambda x: x.split('/')[-1].split(output_suffix)[0], \
         mask_paths))
    if preprocessing_paths:
        intersection = list(set(image_ids) & set(mask_ids) & \
             set(preprocessing_ids))
        preprocessing_paths = list(map(lambda x: os.path.join( \
            preprocessing_root, x+preprocessing_suffix), \
                intersection))
    else:
        intersection = list(set(image_ids) & set(mask_ids))

    image_paths = list(map(lambda x: os.path.join( \
        input_root, x + input_suffix), \
            intersection))
    mask_paths = list(map(lambda x: os.path.join( \
        output_root, x + output_suffix), \
            intersection)) # output_suffix is whatever is placed after the id ...

    dataset_len = len(image_paths)
    train_image_paths = image_paths[:floor(dataset_len * (1 - split))]
    train_mask_paths = mask_paths[:floor(dataset_len * (1 - split))]
    if preprocessing_paths:
        train_preprocessing_paths = preprocessing_paths[:floor(dataset_len *
                                                               (1 - split))]
    else:
        train_preprocessing_paths = []

    validation_image_paths = image_paths[ceil(dataset_len * (1 - split)):]
    validation_mask_paths = mask_paths[ceil(dataset_len * (1 - split)):]
    if preprocessing_paths:
        validation_preprocessing_paths = preprocessing_paths[ceil(dataset_len *
                                                                  (1 -
                                                                   split)):]
    else:
        validation_preprocessing_paths = []

    return Generator(train_image_paths, train_mask_paths, shape, batch_size,
                     train_preprocessing_paths), Generator(
                         validation_image_paths, validation_mask_paths, shape,
                         batch_size, validation_preprocessing_paths)