def trainTasks(train_file, tasks, fold_num, output_folder=None, param_file="params.xml"): """ Given a training instance file and a list of tasks, adapt the training vectors to fit the tasks and build an SVM model for each of them. """ global classifier if not output_folder: output_folder = "models" output_folder = os.path.join(output_folder, "fold-{0:02d}".format(fold_num + 1)) if not os.path.exists(output_folder): os.makedirs(output_folder) parameters = readParams(param_file) assert len(parameters) == len( tasks ), "The parameters file contains parameters for {0} tasks, whereas there are {1}.".format( len(parameters), len(tasks) ) model_files = [] labels, instances = read_problem(train_file) for i, task in enumerate(tasks): print "---training task {0:03d}/{1:03d}".format(i + 1, len(tasks)) new_labels = [int(task[label]) for label in labels] params = parameters[i] paramstring = "" for param, value in params.items(): paramstring += " {0} {1}".format(param, value) if classifier == "libsvm" and "-b" not in params.keys(): paramstring += " -b 1" paramstring += " -q" model_file = os.path.join(output_folder, os.path.basename(train_file) + ".task{0:03d}.model".format(i + 1)) model = train(new_labels, instances, paramstring) save_model(model_file, model) model_files.append(model_file) return model_files
def getPredictions(test_file, tasks, model_files, fold_num, debug=False): """ Returns probability values of the +1 class per task as well as task accuracies (classification accuracies per task). Requires a test instance file, a list of tasks, and corresponding model files. Transform the vectors to fit the tasks and classifies them against the matching SVM model. """ assert len(tasks) == len(model_files), "Not as many model files as tasks" labels, instances = read_problem(test_file) models = [] print "---Loading models..." for model_file in model_files: models.append(load_model(model_file)) task_accuracies = [0.0 for _ in range(len(tasks))] print "---Classifying instances..." predictions = [] if debug: debug_files = [ open(os.path.basename(test_file) + ".fold{0:02d}.task{1:03d}.test".format(fold_num + 1, i + 1), "w") for i in range(len(tasks)) ] for label, instance in zip(labels, instances): instance_predictions = [] for i, (model, task) in enumerate(zip(models, tasks)): new_label = int(task[label]) if debug: o_instance = [":".join([str(idx), str(val)]) for idx, val in sorted(instance.items())] debug_files[i].write("{0} {1}\n".format(new_label, " ".join(o_instance))) global classifier if classifier == "liblinear": pred_labels, ACC, pred_values, label_order = predict([new_label], [instance], model) assert len(pred_values[0]) == 1 pred_value = pred_values[0][0] # If the label order is reversed, reverse the sign of the distance value. if label_order == [-1, 1]: pred_value = -pred_value # Normalize the value if it's not a probability value. pred_value = normalizePrediction(pred_value) elif classifier == "libsvm": pred_labels, (ACC, MSC, SCC), pred_values = predict([new_label], [instance], model, options="-b 1") label_order = model.get_labels() pred_value = pred_values[0][label_order.index(1)] # Add the value to the instance predictions. instance_predictions.append(pred_value) # Add one if the prediction was accurate if new_label == pred_labels[0]: task_accuracies[i] += 1 predictions.append(instance_predictions) if debug: for debug_file in debug_files: debug_file.close() print "---Done." task_accuracies = [score / len(instances) for score in task_accuracies] return predictions, task_accuracies