def cobras_parameter_comparison(): print("making tests") query_budget = 200 tests = TestCollection() test_names = ["cobras_no_noise"] test_dict = { 0.05: [0.96, 0.99], 0.10: [0.96, 0.99], 0.15: [0.91, 0.96, 0.99], 0.20: [0.86, 0.91, 0.96] } for p in [0.10, 0.15, 0.20]: t_values = test_dict[p] for t in t_values: test_names.append("cobras_0.10_p{}_t{}_noise_budget{}".format( p, t, query_budget)) tests.add_10_times_10_fold_test( "cobras_0.10_p{}_t{}_noise_budget{}".format( p, t, query_budget), "COBRAS", cobras_algorithm_settings_to_string(p, 3, 7, t, t, True, False), Dataset.get_non_face_news_spam_names(), "probability_noise_querier", probabilistic_noisy_querier_settings_to_string( 0.10, query_budget), nb_of_runs=1) run_tests_over_SSH_on_machines( tests, himecs_generate_computer_info(start_index=3, nb_of_machines=2)) comparison_name = "all_parameter_study" line_names = [test_name[12:-16] for test_name in test_names] calculate_aris_and_compare_for_tests(comparison_name, test_names, line_names, nb_of_cores=24, query_budget=200, recalculate=False)
def NPU_Cosc_VaryingAmountsOfNoise(): print("making tests") tests = TestCollection() query_budget = 100 for noise_percentage in [-1, 0.10]: noise_text = str(noise_percentage) if noise_percentage != -1 else "no" tests.add_10_times_10_fold_test( "NPU_Cosc_{}_noise_budget{}".format(noise_text, query_budget), "NPU_COSC", "no_parameters", Dataset.get_non_face_news_spam_names(), "probability_noise_querier", probabilistic_noisy_querier_settings_to_string( noise_percentage, query_budget)) run_tests_over_SSH_on_machines(tests, MACHINES_TO_USE) comparison_name = "NPU_Cosc_varying_amounts_of_noise" test_names = [ "NPU_Cosc_no_noise_budget100", "NPU_Cosc_0.1_noise_budget100" ] line_names = None calculate_aris_and_compare_for_tests(comparison_name, test_names, line_names, query_budget=query_budget)
def run_tests(): datasets = [ Dataset(name) for name in Dataset.get_non_face_news_spam_names() ] test_active_clustering_algorithm_n_times_n_fold( "nCOBRAS_filtering", COBRAS(minimum_approximation_order=3, noise_probability=0.10, maximum_approximation_order=7, certainty_threshold_reuse=0.91), ProbabilisticNoisyQuerierBuilder(0.10, 100), datasets, n=3) # test_active_clustering_algorithm_n_times_n_fold("NPU-MPCK-means_no_noise", NPU(MyMPCKMeans(max_iter=10, learn_multiple_full_matrices=False)), NoisyQuerierBuilder(0, 100), datasets, nb_cores=3) # test_active_clustering_algorithm_n_times_n_fold("NPU-MPCK-means_10%_noise", NPU(MyMPCKMeans(max_iter = 10, learn_multiple_full_matrices=False)), NoisyQuerierBuilder(0.10, 100), datasets, nb_cores=3) # test_active_clustering_algorithm("dummy_clusterer1", DummyClusterer(), NoisyQuerierBuilder(0.10, 100), datasets ) # test_active_clustering_algorithm("dummy_clusterer2", DummyClusterer(), NoisyQuerierBuilder(0.10, 100), datasets) test_names = ["nCOBRAS_filtering", "noise_robust_cobras"] compare_algorithms_and_plot_results_n_times_n_fold("nCOBRAS_filtering", test_names, calculate_aris=False)