def knn_experiment(): K = len(topics) dm = data.create_data_manager() ordered_topics = dm.order_topics(topics) print 'loading train data . . .' X_train, Y_train = dm.load_data('train') Y_train_slice = dm.slice_Y(Y_train, ordered_topics) print 'loading test data . . .' X_test, Y_test = dm.load_data('test') Y_gold = dm.slice_Y(Y_test, ordered_topics) final_results = {} print 'interating over models . . .' for k in [1,15,31,45,61]: model = '%d-nearest neighbours' % k print 'now using model %s . . .' % model learner = knn.MultikNN(K, k) # K being the number of topics, and k being the number of neighbours learner.train(X_train, Y_train_slice) Y_pred = learner.batch_predict_classes(X_test) results = evaluate.per_topic_results(Y_pred, Y_gold) results_dict = {topic: result for (topic, result) in zip(ordered_topics, results)} pprint(results_dict) final_results[model] = results_dict print 'saving final results . . .' with open('results/knn_final_results.txt', 'w') as f: pprint(final_results, stream=f)
def svm_experiment(C, balance=''): K = len(topics) dm = data.create_data_manager() ordered_topics = dm.order_topics(topics) print 'loading train data . . .' X_train, Y_train = dm.load_data('train') Y_train_slice = dm.slice_Y(Y_train, ordered_topics) print 'loading test data . . .' X_test, Y_test = dm.load_data('test') Y_gold = dm.slice_Y(Y_test, ordered_topics) final_results = {} #print (Y_train_slice.sum(axis=0) + Y_train_slice.shape[0]).tolist() #raise Exception print 'interating over models . . .' for model, kernel in svm_kernels: print 'now using model %s . . .' % model learner = svm.MultiSVM(K, C, kernel) learner.train(X_train, Y_train_slice, ordered_topics, balance=balance, # use balance = '' if don't want to balance max_per_class=3000) Y_pred = learner.batch_predict_classes(X_test) results = evaluate.per_topic_results(Y_pred, Y_gold) results_dict = {topic: result for (topic, result) in zip(ordered_topics, results)} pprint(results_dict) final_results[model] = results_dict print 'saving final results . . .' with open('results/svm_final_results_C_%.1f_%s.txt' % (C,balance), 'w') as f: pprint(final_results, stream=f)