Пример #1
0
 def __init__(self,batch,train_kbest = None,train_gold = None,dev_kbest = None,dev_gold = None,
              test_kbest = None,test_gold = None,vocab_path = None):
     self.vocab = None
     self.train_kbest = train_kbest
     self.train_gold = train_gold
     self.dev_kbest = dev_kbest
     self.dev_gold = dev_gold
     self.batch = batch
     self.test_kbest = test_kbest
     self.test_gold = test_gold
     if os.path.exists(vocab_path):
         print 'load vocab'
         self.max_degree,self.vocab = data_util.load_dict(vocab_path)
     else:
         print 'creat vocab'
         self.vocab = Vocab.Vocab(self.train_gold)
         print 'get max_degree'
         self.max_degree = self.get_max_degree()
         print 'save dictionary'
         data_util.save_dict(self.vocab,self.max_degree, vocab_path)
     print 'vocab size:' + str(self.vocab.size())
     print 'max_degree' + str(self.max_degree)
     print 'get dev data'
     self.dev_data = dev_reader.read_dev(dev_kbest,dev_gold,self.vocab)
     print 'number of dev:'+str(len(self.dev_data))
     #self.test_data = dev_reader.read_dev(test_kbest,test_gold,self.vocab)
     # print 'create train batch'
     # self.train_iter = train_iterator.train_iterator(train_kbest,train_gold,self.vocab,self.batch)
     print 'get train data'
     self.train_data = dev_reader.read_dev(train_kbest,train_gold,self.vocab)
     print 'number of train:'+str(len(self.train_data))
Пример #2
0
def test_model():
    max_degree, vocab = data_util.load_dict(os.path.join(DIR, OUTPUT_DICT))
    dev_data = dev_reader.read_dev(os.path.join(DIR, DEV + '.kbest'),
                                   os.path.join(DIR, DEV + '.gold'), vocab)
    test_data = dev_reader.read_dev(os.path.join(DIR, TEST + '.kbest'),
                                    os.path.join(DIR, TEST + '.gold'), vocab)
    print 'model file name %s' % OUTPUT_MODEL
    print 'build model'
    #model = dependency_model.get_model(vocab.size(),vocab.tagsize(),max_degree,PAIR_WISE)
    print 'load params'
    #model.set_parmas(os.path.join(DIR,OUTPUT_MODEL))
    model = 0
    max = 0
    max_r = 0
    for i in range(200):
        if PAIR_WISE:
            res = evaluate_dataset_pair(model, dev_data, True, ratio=0.005 * i)
        else:
            res = evaluate_dataset_point(model,
                                         dev_data,
                                         True,
                                         ratio=0.005 * i)
        if res[0] > max:
            max = res[0]
            max_r = 0.005 * i
    print max_r, max
Пример #3
0
def test_model():
    max_degree,vocab = data_util.load_dict(os.path.join(DIR,OUTPUT_DICT))
    dev_data = dev_reader.read_dev(os.path.join(DIR, DEV + '.kbest'),
                                        os.path.join(DIR, DEV + '.gold'), vocab)
    test_data = dev_reader.read_dev(os.path.join(DIR, TEST + '.kbest'),
                                        os.path.join(DIR, TEST + '.gold'), vocab)
    print 'build model'
    model = dependency_model.get_model(vocab.size(), max_degree)
    print 'load params'
    model.set_parmas(os.path.join(DIR,OUTPUT_MODEL))
    print 'addbase'
    max = 0
    for i in range(200):
        res = evaluate_dataset(model,dev_data,True,ratio=0.005*i)
        if res[0]>max:
            max = res[0]
Пример #4
0
def test_model():
    max_degree,vocab = data_util.load_dict(os.path.join(DIR, OUTPUT_DICT))
    dev_data = dev_reader.read_dev(os.path.join(DIR, DEV + '.kbest'),
                                            os.path.join(DIR, DEV + '.gold'), vocab)
    # test_data = dev_reader.read_dev(os.path.join(DIR, TEST + '.kbest'),
    #                                     os.path.join(DIR, TEST + '.gold'), vocab)
    #evaluate_oracle_worst(test_data)
    evaluate_baseline_random(dev_data)
    evaluate_oracle_worst(dev_data)
    print 'build model'
    model = dependency_model.get_model(vocab.size(),vocab.tagsize(), max_degree,PAIR_WISE)
    print 'load params'
    model.set_parmas(os.path.join(DIR,OUTPUT_MODEL))
    if model.Pairwise:
        evaluate_dataset_pair(model,dev_data)
    else:
        evaluate_dataset_point(model, dev_data,False)
Пример #5
0
def test_model():
    max_degree,vocab = data_util.load_dict(os.path.join(DIR,OUTPUT_DICT))
    dev_data = dev_reader.read_dev(os.path.join(DIR, DEV + '.kbest'),
                                        os.path.join(DIR, DEV + '.gold'), vocab)
    # test_data = dev_reader.read_dev(os.path.join(DIR, TEST + '.kbest'),
    #                                     os.path.join(DIR, TEST + '.gold'), vocab)
    #evaluate_oracle_worst(test_data)
    evaluate_oracle_worst(dev_data)
    print 'build model'
    model = dependency_model.get_model(vocab.size(), max_degree)
    print 'load params'
    model.set_parmas(os.path.join(DIR,OUTPUT_MODEL))
    print 'addbase'
    evaluate_dataset(model,dev_data,True)
    #evaluate_dataset(model, test_data, True)
    print 'withoutbase'
    evaluate_dataset(model,dev_data,False)
Пример #6
0
import dev_reader
import os
import parser_test
DIR = 'd:\\MacShare\\32best\\'
TRAIN = 'train'
DEV = 'dev32'
TEST = 'test32'

if __name__ == '__main__':
    test_data = dev_reader.read_dev(os.path.join(DIR, DEV + '.kbest'),
                                    os.path.join(DIR, DEV + '.gold'), None)
    parser_test.evaluate_oracle_worst(test_data)
    test_data = dev_reader.read_dev(os.path.join(DIR, TEST + '.kbest'),
                                    os.path.join(DIR, TEST + '.gold'), None)
    parser_test.evaluate_oracle_worst(test_data)