示例#1
0
def simple_cobras_tests():
    # this class contains the logic to build different kind of test_cobras cases
    # if you want another testing scenario you can add it to this class
    tests = TestCollection()
    tests.add_10_times_10_fold_test(
        "<TEST NAME>",
        "COBRAS",
        cobras_algorithm_settings_to_string(0.10, 3, 7, 0.91, 0.91, False,
                                            False),
        Dataset.get_dataset_names(),
        "probability_noise_querier",  # name of the querier
        probabilistic_noisy_querier_settings_to_string(
            0, 200))  # noise probability 0 --> no noise
    # this runs the tests locally over the number of cores specified
    run_tests_local(tests, nb_of_cores=4)

    # after running several of the above you can compare different results as follows
    comparison_name = "NAME OF THE COMPARISON"
    test_names = [
        "<TEST NAME>", "<OTHER TEST NAME>"
    ]  # these should be the same string as the first argument of tests.add_10_times_10_fold_test
    line_names = [
        "<simple name for <TEST NAME>>", "<OTHER SIMPLE NAME>"
    ]  # these names are displayed in the legend of the plots instead of test_cobras names (test_cobras names should be unique and can thus become very large)
    # this will calculate all the aris and compare the tests this is not possible over SSH but this is not as much works as well
    calculate_aris_and_compare_for_tests(comparison_name,
                                         test_names,
                                         line_names,
                                         query_budget=200,
                                         nb_of_cores=4)
示例#2
0
def generate_folds_for_dataset():
    dataset_names = Dataset.get_dataset_names() + Dataset.interesting_2d_datasets()

    for dataset_name in dataset_names:

        dataset = Dataset(dataset_name)
        print("making folds for dataset ", dataset_name)
        os.makedirs(os.path.join(FOLD_PATH, dataset_name), exist_ok=True)
        for run_nb in range(10):
            # toon's code
            # skf = cross_validation.StratifiedKFold(labels, n_folds=5, shuffle=True)
            skf = StratifiedKFold(n_splits = 10, shuffle = True)
            # skf = KFold(n_splits=10, shuffle=True)
            labels = dataset.target

            for fold_nb, (train_indices, test_indices) in enumerate(skf.split(np.zeros(len(labels)), labels)):

                to_write = dict()
                to_write["train_indices"] = train_indices.tolist()
                to_write["test_indices"] = test_indices.tolist()
                if os.path.isfile(os.path.join(FOLD_PATH, dataset_name, "run{}_fold{}.txt".format(run_nb, fold_nb))):
                    print("fold file already exists! not overwriting!")
                    continue
                with open(os.path.join(FOLD_PATH, dataset_name, "run{}_fold{}.txt".format(run_nb, fold_nb)), mode = 'w') as fold_file:
                    json.dump(to_write, fold_file)
示例#3
0
def ncobras_noise_comparison_fixed_noise_changing_p_noise():
    print("making tests")
    tests = TestCollection()

    tests.add_10_times_10_fold_test(
        "ncobras_0.10_noise_0.05_p_noise", "COBRAS",
        cobras_algorithm_settings_to_string(0.05, 3, 7, 0.96, 0.96,
                                            True, False),
        Dataset.get_dataset_names(), "probability_noise_querier",
        probabilistic_noisy_querier_settings_to_string(0.10, 250))
    tests.add_10_times_10_fold_test(
        "ncobras_0.10_noise_0.10_p_noise", "COBRAS",
        cobras_algorithm_settings_to_string(0.10, 3, 7, 0.91, 0.91,
                                            True, False),
        Dataset.get_dataset_names(), "probability_noise_querier",
        probabilistic_noisy_querier_settings_to_string(0.10, 250))
    tests.add_10_times_10_fold_test(
        "ncobras_0.10_noise_0.15_p_noise", "COBRAS",
        cobras_algorithm_settings_to_string(0.15, 3, 7, 0.91, 0.91,
                                            True, False),
        Dataset.get_dataset_names(), "probability_noise_querier",
        probabilistic_noisy_querier_settings_to_string(0.10, 250))
    tests.add_10_times_10_fold_test(
        "ncobras_0.10_noise_0.20_p_noise", "COBRAS",
        cobras_algorithm_settings_to_string(0.20, 3, 7, 0.91, 0.91,
                                            True, False),
        Dataset.get_dataset_names(), "probability_noise_querier",
        probabilistic_noisy_querier_settings_to_string(0.10, 250))
    run_tests_over_SSH_on_machines(
        tests, generate_computer_info(start_index=21, nb_of_machines=5))
    comparison_name = "ncobras_parameter_sensitivity"
    test_names = [
        "ncobras_0.10_noise_0.05_p_noise", "ncobras_0.10_noise_0.10_p_noise",
        "ncobras_0.10_noise_0.15_p_noise", "ncobras_0.10_noise_0.20_p_noise"
    ]

    line_names = None
    calculate_aris_and_compare_for_tests(comparison_name, test_names,
                                         line_names)
示例#4
0
def ncobras_plus_varying_amounts_of_noise():
    print("making tests")
    tests = TestCollection()
    query_budget = 200
    for noise_percentage in [-1, 0.05, 0.10]:
        noise_text = str(noise_percentage) if noise_percentage != -1 else "no"
        threshold = 0.95
        noise_percentage_to_use = noise_percentage if noise_percentage > 0 else 0.10
        tests.add_10_times_10_fold_test(
            "NCOBRASplus_{}_noise_budget{}_pnoise{}_threshold{}".format(
                noise_text, query_budget, noise_percentage_to_use, threshold),
            "COBRAS",
            cobras_algorithm_settings_to_string(noise_percentage_to_use, 3, 10,
                                                threshold, threshold, True,
                                                False),
            Dataset.get_dataset_names(),
            "probability_noise_querier",
            probabilistic_noisy_querier_settings_to_string(
                noise_percentage, query_budget),
            nb_of_runs=10)
    run_tests_over_SSH_on_machines(tests, MACHINES_TO_USE)
示例#5
0
def ncobras_plus_runtime_test():
    tests = TestCollection()
    query_budget = 100
    noise_precentage = 0.05
    threshold = 0.95
    tests.add_10_times_10_fold_test(
        "NCOBRASplus_{}_noise_budget{}_pnoise{}_threshold{}_runtimes".format(
            noise_precentage, query_budget, noise_precentage, threshold),
        "COBRAS",
        cobras_algorithm_settings_to_string(noise_precentage,
                                            min_approx_order=3,
                                            max_approx_order=10,
                                            keep_threshold=threshold,
                                            reuse_threshold=threshold,
                                            correct_noise=True,
                                            use_all_cycles=False),
        Dataset.get_dataset_names(),
        "probability_noise_querier",
        probabilistic_noisy_querier_settings_to_string(noise_precentage,
                                                       query_budget),
        nb_of_runs=10)
    run_tests_over_SSH_on_machines(tests, MACHINES_TO_USE)
示例#6
0
def cobras_VaryingAmountsOfNoise():
    print("making tests")
    tests = TestCollection()
    query_budget = 200
    for noise_percentage in [-1, 0.05, 0.10, 0.20]:
        noise_text = str(noise_percentage) if noise_percentage != -1 else "no"
        tests.add_10_times_10_fold_test(
            "cobras_{}_noise_budget{}".format(noise_text, query_budget),
            "COBRAS",
            cobras_algorithm_settings_to_string(0.10, 3, 7, 0.91, 0.91, False,
                                                False),
            Dataset.get_dataset_names(), "probability_noise_querier",
            probabilistic_noisy_querier_settings_to_string(
                noise_percentage, query_budget))
    run_tests_over_SSH_on_machines(tests, MACHINES_TO_USE)
    comparison_name = "cobras_varying_amounts_of_noise"
    test_names = [
        "cobras_{}_noise_budget200".format(i)
        for i in ["no", 0.05, 0.10, 0.20]
    ]
    line_names = None
    calculate_aris_and_compare_for_tests(comparison_name, test_names,
                                         line_names)
示例#7
0
def calculate_all_W_matrices(nb_of_cores=8):
    tasks = []
    for dataset_name in Dataset.get_dataset_names():
        tasks.append(CalculateWMatrix(dataset_name))
    run_tests_from_generator(tasks, nb_of_cores=nb_of_cores)