示例#1
0
def knn_experiment():
    K = len(topics)
    dm = data.create_data_manager()
    ordered_topics = dm.order_topics(topics)

    print 'loading train data . . .'
    X_train, Y_train = dm.load_data('train')
    Y_train_slice = dm.slice_Y(Y_train, ordered_topics)

    print 'loading test data . . .'
    X_test, Y_test = dm.load_data('test')
    Y_gold = dm.slice_Y(Y_test, ordered_topics)

    final_results = {}

    print 'interating over models . . .'
    for k in [1,15,31,45,61]:
        model = '%d-nearest neighbours' % k

        print 'now using model %s . . .' % model
        learner = knn.MultikNN(K, k)
        # K being the number of topics, and k being the number of neighbours
        learner.train(X_train, Y_train_slice)
        Y_pred = learner.batch_predict_classes(X_test)

        results = evaluate.per_topic_results(Y_pred, Y_gold)
        results_dict = {topic: result for (topic, result) in zip(ordered_topics, results)}

        pprint(results_dict)
        final_results[model] = results_dict

    print 'saving final results . . .'
    with open('results/knn_final_results.txt', 'w') as f:
        pprint(final_results, stream=f)
示例#2
0
def svm_experiment(C, balance=''):
    K = len(topics)
    dm = data.create_data_manager()
    ordered_topics = dm.order_topics(topics)

    print 'loading train data . . .'
    X_train, Y_train = dm.load_data('train')
    Y_train_slice = dm.slice_Y(Y_train, ordered_topics)

    print 'loading test data . . .'
    X_test, Y_test = dm.load_data('test')
    Y_gold = dm.slice_Y(Y_test, ordered_topics)

    final_results = {}

    #print (Y_train_slice.sum(axis=0) + Y_train_slice.shape[0]).tolist()
    #raise Exception

    print 'interating over models . . .'
    for model, kernel in svm_kernels:
        print 'now using model %s . . .' % model
        learner = svm.MultiSVM(K, C, kernel)
        learner.train(X_train, Y_train_slice,
                      ordered_topics,
                      balance=balance, # use balance = '' if don't want to balance
                      max_per_class=3000)
        Y_pred = learner.batch_predict_classes(X_test)

        results = evaluate.per_topic_results(Y_pred, Y_gold)
        results_dict = {topic: result for (topic, result) in zip(ordered_topics, results)}

        pprint(results_dict)
        final_results[model] = results_dict

    print 'saving final results . . .'
    with open('results/svm_final_results_C_%.1f_%s.txt' % (C,balance), 'w') as f:
        pprint(final_results, stream=f)