Пример #1
0
import Parser, scikit_classifier

if __name__ == '__main__':
    # takes forever to run
    with open('data/output/searchresult.txt', 'w') as f:
        for ngram_size in [0, 1, 2]:
            for use_syntactic_features in [0]:
                for pos_window_size in [0, 1, 5, 10]:
                    for window_size in [0, 5, 10, 50, 100, 500]:
                        for use_lesk in [False]:
                            for use_lesk_words in [False]:
                                classifier = scikit_classifier.scikit_classifier(
                                    pos_window_size=pos_window_size,
                                    ngram_size=ngram_size,
                                    window_size=window_size,
                                    use_syntactic_features=
                                    use_syntactic_features,
                                    use_lesk=use_lesk,
                                    use_lesk_words=use_lesk_words)

                                egs = Parser.load_examples(
                                    'data/wsd-data/train_split.data')
                                test_egs = Parser.load_examples(
                                    'data/wsd-data/valiation_split.data')

                                classifier.train(egs)
                                tp = 0.0
                                fp = 0.0
                                tn = 0.0
                                fn = 0.0
                                for eg in test_egs:
Пример #2
0
import Parser, scikit_classifier

if __name__ == '__main__':
    # takes forever to run
    with open('data/output/searchresult.txt', 'w') as f:
        for ngram_size in [0,1,2]:
            for use_syntactic_features in [0]:
                for pos_window_size in [0,1,5,10]:
                    for window_size in [0, 5, 10, 50, 100, 500]:
                        for use_lesk in [False]:
                            for use_lesk_words in [False]:
                                classifier = scikit_classifier.scikit_classifier(
                                                pos_window_size = pos_window_size,
                                                ngram_size = ngram_size,
                                                window_size = window_size,
                                                use_syntactic_features = use_syntactic_features,
                                                use_lesk = use_lesk,
                                                use_lesk_words = use_lesk_words)
                            
                                egs = Parser.load_examples('data/wsd-data/train_split.data')
                                test_egs = Parser.load_examples('data/wsd-data/valiation_split.data')
                            
                                classifier.train(egs)                        
                                tp = 0.0
                                fp = 0.0
                                tn = 0.0
                                fn = 0.0
                                for eg in test_egs:
                                    pred = classifier.predict([eg])
                                    for (k,(s,p)) in enumerate(zip(eg.senses,pred)):
                                        if s == 1 and p == 1:
Пример #3
0
    #testdata = Parser.load_data('data/wsd-data/test.data')
    testdata = Parser.load_test_data('data/wsd-data/test.data')
    with open('data/output/%s.txt'%name, 'w') as f:
        for testexample in testdata:
            prediction = classifier.predict([testexample])
            for element in prediction:
                f.write('%d\n' % element)
        f.close()
    print("done")
elif task == 5:
    name = "scikit_test_ws500_pos1_ngram0_synfeat0_uselesk_useleskwords"
    classifier = scikit_classifier.scikit_classifier(
        window_size=500,
        use_syntactic_features=0,
        pos_window_size=1,
        ngram_size=0,
        use_lesk=True,
        use_lesk_words=True,
        training_file='data/wsd-data/train.data',
        test_file='data/wsd-data/test.data')
    egs = Parser.load_examples('data/wsd-data/train.data')
    test_egs = Parser.load_examples('data/wsd-data/test.data')
    
    # Train the classifier(s)
    classifier.train(egs)
    with open('data/output/%s.txt'%name, 'w') as f:
        for eg in test_egs:
            pred = classifier.predict([eg])
            for p in pred:
                f.write("%d\n"%p)
Пример #4
0
    #testdata = Parser.load_data('data/wsd-data/test.data')
    testdata = Parser.load_test_data('data/wsd-data/test.data')
    with open('data/output/%s.txt' % name, 'w') as f:
        for testexample in testdata:
            prediction = classifier.predict([testexample])
            for element in prediction:
                f.write('%d\n' % element)
        f.close()
    print("done")
elif task == 5:
    name = "scikit_test_ws500_pos1_ngram0_synfeat0_uselesk_useleskwords"
    classifier = scikit_classifier.scikit_classifier(
        window_size=500,
        use_syntactic_features=0,
        pos_window_size=1,
        ngram_size=0,
        use_lesk=True,
        use_lesk_words=True,
        training_file='data/wsd-data/train.data',
        test_file='data/wsd-data/test.data')
    egs = Parser.load_examples('data/wsd-data/train.data')
    test_egs = Parser.load_examples('data/wsd-data/test.data')

    # Train the classifier(s)
    classifier.train(egs)
    with open('data/output/%s.txt' % name, 'w') as f:
        for eg in test_egs:
            pred = classifier.predict([eg])
            for p in pred:
                f.write("%d\n" % p)