def get_positive_neighbors_models(in_svm_data_file_template, output_file, wl_iter_range, k_range=None, radius_range=None, override_tergets_function=None): assert bool(k_range) ^ bool(radius_range) def compute_model(X, y, param, param_is_k): _, avg_count, avg_prop = get_positive_neighbors_counts(X, y, k=param if param_is_k else None, radius=None if param_is_k else param) model = {"wl_iterations": w, "avg_count": avg_count, "avg_prop": avg_prop} if param_is_k: model["k"] = param else: model["radius"] = param return model param_range = k_range if k_range else radius_range param_is_k = bool(k_range) with open(output_file, "w") as fl: for w in wl_iter_range: data_file = in_svm_data_file_template.format(w) X, y = dataset_manager.read_svm_light_bool_data_to_sparse(data_file) if override_tergets_function: y = override_tergets_function(y) for param in param_range: model = compute_model(X, y, param, param_is_k) fl.write("{0},\n".format(model)) fl.flush() print model yield model
if __name__ == '__main__': # path = "/media/ivan/204C66C84C669874/Uni-Bonn/Thesis/Main/6_Results/svm/mutagenicity/" # path = "/media/ivan/204C66C84C669874/Uni-Bonn/Thesis/Main/6_Results/svm/nci_hiv/" path = "/home/stud/ivanovi/Thesis/svm/nci_hiv/" # wl_props = helpers.svm_light_format_datasets["mutagenicity"] # wl_props = helpers.svm_light_format_datasets["nci-hiv"]["A-vs-M"] wl_props = helpers.svm_light_format_datasets["nci-hiv"]["AM-vs-I"] # wl_props = helpers.svm_light_format_datasets["nci-hiv"]["A-vs-I"] output_file = path + "positive_neighbors_AM_vs_I" print "Start" with open(output_file, "w") as fl: for w in range(0, 12): data_file = path + wl_props["file_template"].format(w) X, y = dataset_manager.read_svm_light_bool_data_to_sparse(data_file) # y = np.vectorize(lambda t: 1 if t == 2 else -1)(y) # Only for A_vs_M y = np.vectorize(lambda t: 1 if t == 2 else t)(y) # Only for AM_vs_I and A_vs_I for n in range(1, 500): prediction = PositiveNeighbors.cross_validate(X, y, n_neighbors=n, folds_count=10, approximate=False) print w, n, prediction fl.write("{0}, {1}, {2}\n".format(w, n, prediction)) fl.flush() if prediction >= 0.95: break print "Done"
# path = "/media/ivan/204C66C84C669874/Uni-Bonn/Thesis/Main/6_Results/svm/nci_hiv/" path = "/home/stud/ivanovi/Thesis/svm/nci_hiv/" # wl_props = helpers.svm_light_format_datasets["mutagenicity"] # wl_props = helpers.svm_light_format_datasets["nci-hiv"]["A-vs-M"] wl_props = helpers.svm_light_format_datasets["nci-hiv"]["AM-vs-I"] # wl_props = helpers.svm_light_format_datasets["nci-hiv"]["A-vs-I"] output_file = path + "positive_neighbors_AM_vs_I" print "Start" with open(output_file, "w") as fl: for w in range(0, 12): data_file = path + wl_props["file_template"].format(w) X, y = dataset_manager.read_svm_light_bool_data_to_sparse( data_file) # y = np.vectorize(lambda t: 1 if t == 2 else -1)(y) # Only for A_vs_M y = np.vectorize(lambda t: 1 if t == 2 else t)( y) # Only for AM_vs_I and A_vs_I for n in range(1, 500): prediction = PositiveNeighbors.cross_validate( X, y, n_neighbors=n, folds_count=10, approximate=False) print w, n, prediction fl.write("{0}, {1}, {2}\n".format(w, n, prediction)) fl.flush() if prediction >= 0.95: break print "Done"