def get_train_data_from_lang(lang): if lang == 'swedish': return dataset.get_swedish_train_corpus().parsed_sents() elif lang == 'danish': return dataset.get_danish_train_corpus().parsed_sents() elif lang == 'english': return dataset.get_english_train_corpus().parsed_sents() else: raise ValueError( "Please don't use {}, only use english, swedish or danish".format( lang))
from featureextractor import FeatureExtractor from transition import Transition if __name__ == '__main__': print('Loading training sets... ') # EN_data = dataset.get_english_train_corpus().parsed_sents() # random.seed('english') # EN_subdata = random.sample(EN_data, 200) # print('EN') # SE_data = dataset.get_swedish_train_corpus().parsed_sents() # random.seed('swedish') # SE_subdata = random.sample(SE_data, 200) # print('SE') # DK_data = dataset.get_danish_train_corpus().parsed_sents() random.seed('danish') DK_subdata = random.sample(DK_data, 200) print('DK') try: # EN # print('Saving EN model... ') # tp = TransitionParser(Transition, FeatureExtractor) # tp.train(EN_subdata) # tp.save('english.model') # print('Ok') # print('Parsing dev corpus...') # EN_testdata = dataset.get_english_dev_corpus().parsed_sents() # EN_tp = TransitionParser.load('english.model') # EN_parsed = EN_tp.parse(EN_testdata)
tp = TransitionParser.load(modelfile) parsed = tp.parse(blinddata) ev = DependencyEvaluator(labeleddata, parsed) print "UAS: {} \nLAS: {}".format(*ev.eval()) with open(conllfile, 'w') as f: for p in parsed: f.write(p.to_conll(10).encode('utf-8')) f.write('\n') print time.ctime( ), "-------DONE----- TESTING english ", modelfile, conllfile if F_TRAIN_DANISH == True: print time.ctime(), "START TRAIN danish" traindata = dataset.get_danish_train_corpus().parsed_sents() labeleddata = dataset.get_danish_dev_corpus().parsed_sents() blinddata = dataset.get_danish_dev_blind_corpus().parsed_sents() modelfile = 'danish.model' conllfile = 'danish.conll' tp = TransitionParser(Transition, FeatureExtractor) tp.train(traindata) tp.save(modelfile) # load model for testing tp = TransitionParser.load(modelfile) parsed = tp.parse(blinddata) ev = DependencyEvaluator(labeleddata, parsed)