示例#1
0
def cobras_parameter_comparison():
    print("making tests")
    query_budget = 200

    tests = TestCollection()
    test_names = ["cobras_no_noise"]
    test_dict = {
        0.05: [0.96, 0.99],
        0.10: [0.96, 0.99],
        0.15: [0.91, 0.96, 0.99],
        0.20: [0.86, 0.91, 0.96]
    }
    for p in [0.10, 0.15, 0.20]:
        t_values = test_dict[p]
        for t in t_values:
            test_names.append("cobras_0.10_p{}_t{}_noise_budget{}".format(
                p, t, query_budget))
            tests.add_10_times_10_fold_test(
                "cobras_0.10_p{}_t{}_noise_budget{}".format(
                    p, t, query_budget),
                "COBRAS",
                cobras_algorithm_settings_to_string(p, 3, 7, t, t, True,
                                                    False),
                Dataset.get_non_face_news_spam_names(),
                "probability_noise_querier",
                probabilistic_noisy_querier_settings_to_string(
                    0.10, query_budget),
                nb_of_runs=1)

    run_tests_over_SSH_on_machines(
        tests, himecs_generate_computer_info(start_index=3, nb_of_machines=2))
    comparison_name = "all_parameter_study"
    line_names = [test_name[12:-16] for test_name in test_names]
    calculate_aris_and_compare_for_tests(comparison_name,
                                         test_names,
                                         line_names,
                                         nb_of_cores=24,
                                         query_budget=200,
                                         recalculate=False)
示例#2
0
def NPU_Cosc_VaryingAmountsOfNoise():
    print("making tests")
    tests = TestCollection()
    query_budget = 100
    for noise_percentage in [-1, 0.10]:
        noise_text = str(noise_percentage) if noise_percentage != -1 else "no"
        tests.add_10_times_10_fold_test(
            "NPU_Cosc_{}_noise_budget{}".format(noise_text,
                                                query_budget), "NPU_COSC",
            "no_parameters", Dataset.get_non_face_news_spam_names(),
            "probability_noise_querier",
            probabilistic_noisy_querier_settings_to_string(
                noise_percentage, query_budget))
    run_tests_over_SSH_on_machines(tests, MACHINES_TO_USE)
    comparison_name = "NPU_Cosc_varying_amounts_of_noise"
    test_names = [
        "NPU_Cosc_no_noise_budget100", "NPU_Cosc_0.1_noise_budget100"
    ]
    line_names = None
    calculate_aris_and_compare_for_tests(comparison_name,
                                         test_names,
                                         line_names,
                                         query_budget=query_budget)
示例#3
0
def run_tests():

    datasets = [
        Dataset(name) for name in Dataset.get_non_face_news_spam_names()
    ]
    test_active_clustering_algorithm_n_times_n_fold(
        "nCOBRAS_filtering",
        COBRAS(minimum_approximation_order=3,
               noise_probability=0.10,
               maximum_approximation_order=7,
               certainty_threshold_reuse=0.91),
        ProbabilisticNoisyQuerierBuilder(0.10, 100),
        datasets,
        n=3)

    # test_active_clustering_algorithm_n_times_n_fold("NPU-MPCK-means_no_noise", NPU(MyMPCKMeans(max_iter=10, learn_multiple_full_matrices=False)), NoisyQuerierBuilder(0, 100), datasets, nb_cores=3)
    # test_active_clustering_algorithm_n_times_n_fold("NPU-MPCK-means_10%_noise", NPU(MyMPCKMeans(max_iter = 10, learn_multiple_full_matrices=False)), NoisyQuerierBuilder(0.10, 100), datasets, nb_cores=3)
    # test_active_clustering_algorithm("dummy_clusterer1", DummyClusterer(), NoisyQuerierBuilder(0.10, 100), datasets )
    # test_active_clustering_algorithm("dummy_clusterer2", DummyClusterer(), NoisyQuerierBuilder(0.10, 100), datasets)
    test_names = ["nCOBRAS_filtering", "noise_robust_cobras"]

    compare_algorithms_and_plot_results_n_times_n_fold("nCOBRAS_filtering",
                                                       test_names,
                                                       calculate_aris=False)