def crossval_multilabel_dataset(path_to_data, examples_count, folds_count, wl_iter_range, k_L_range, prediction_threshold_range, output_dir, window_size=None): data_file = path_to_data + "multilabel_svm_light_data_wl_{0}" for wl_iterations in wl_iter_range: for prediction_threshold in prediction_threshold_range: data = dataset_manager.read_svm_light_bool_data( data_file.format(wl_iterations)) base_model = { "wl_iterations": wl_iterations, "pred_threshold": prediction_threshold } if window_size: base_model["w"] = window_size best_model = crossval.d_fold_crossval( data, examples_count, folds_count, k_L_range, output_dir, base_model=base_model, multilabel=True, multilabel_prediction_threshold=prediction_threshold) print "Best model:", best_model
def prepare_target_with_predictions(svm_light_val_file, predictions_file): val_data = dataset_manager.read_svm_light_bool_data(svm_light_val_file) pred_f = open(predictions_file) real_targets = imap(lambda x: x[0], val_data) pred_targets = imap(float, pred_f.readlines()) return real_targets, pred_targets
def crossval_multilabel_dataset(path_to_data, examples_count, folds_count, wl_iter_range, k_L_range, prediction_threshold_range, output_dir, window_size=None): data_file = path_to_data + "multilabel_svm_light_data_wl_{0}" for wl_iterations in wl_iter_range: for prediction_threshold in prediction_threshold_range: data = dataset_manager.read_svm_light_bool_data(data_file.format(wl_iterations)) base_model = {"wl_iterations": wl_iterations, "pred_threshold": prediction_threshold} if window_size: base_model["w"] = window_size best_model = crossval.d_fold_crossval(data, examples_count, folds_count, k_L_range, output_dir, base_model=base_model, multilabel=True, multilabel_prediction_threshold=prediction_threshold) print "Best model:", best_model
def crossval_big_dataset(): path_to_data = "/media/ivan/204C66C84C669874/Uni-Bonn/Thesis/Main/6_Results/svm/nci_hiv/data/A_vs_M/" examples_count = 1503 folds_count = 10 data_file = path_to_data + "svm_light_data_wl_{0}" for wl_iterations in wl_iter_range: data = dataset_manager.read_svm_light_bool_data(data_file.format(wl_iterations)) data = imap(lambda tup: (1 if tup[0] == 2 else -1, tup[1]), data) # TODO: only for A_vs_M base_model = {"wl_iterations": wl_iterations} best_model = crossval.d_fold_crossval(data, examples_count, folds_count, k_L_range, output_dir, base_model=base_model) print "Best model:", best_model