Exemplo n.º 1
0
def get_train_data_from_lang(lang):
    if lang == 'swedish':
        return dataset.get_swedish_train_corpus().parsed_sents()
    elif lang == 'danish':
        return dataset.get_danish_train_corpus().parsed_sents()
    elif lang == 'english':
        return dataset.get_english_train_corpus().parsed_sents()
    else:
        raise ValueError(
            "Please don't use {}, only use english, swedish or danish".format(
                lang))
Exemplo n.º 2
0
from featureextractor import FeatureExtractor
from transition import Transition

if __name__ == '__main__':
    print('Loading training sets... ')
    # EN_data = dataset.get_english_train_corpus().parsed_sents()
    # random.seed('english')
    # EN_subdata = random.sample(EN_data, 200)
    # print('EN')

    # SE_data = dataset.get_swedish_train_corpus().parsed_sents()
    # random.seed('swedish')
    # SE_subdata = random.sample(SE_data, 200)
    # print('SE')
    #
    DK_data = dataset.get_danish_train_corpus().parsed_sents()
    random.seed('danish')
    DK_subdata = random.sample(DK_data, 200)
    print('DK')

    try:
        # EN
        # print('Saving EN model... ')
        # tp = TransitionParser(Transition, FeatureExtractor)
        # tp.train(EN_subdata)
        # tp.save('english.model')
        # print('Ok')
        # print('Parsing dev corpus...')
        # EN_testdata = dataset.get_english_dev_corpus().parsed_sents()
        # EN_tp = TransitionParser.load('english.model')
        # EN_parsed = EN_tp.parse(EN_testdata)
Exemplo n.º 3
0
            tp = TransitionParser.load(modelfile)
            parsed = tp.parse(blinddata)

            ev = DependencyEvaluator(labeleddata, parsed)
            print "UAS: {} \nLAS: {}".format(*ev.eval())

            with open(conllfile, 'w') as f:
                for p in parsed:
                    f.write(p.to_conll(10).encode('utf-8'))
                    f.write('\n')
            print time.ctime(
            ), "-------DONE----- TESTING english ", modelfile, conllfile

        if F_TRAIN_DANISH == True:
            print time.ctime(), "START TRAIN danish"
            traindata = dataset.get_danish_train_corpus().parsed_sents()
            labeleddata = dataset.get_danish_dev_corpus().parsed_sents()
            blinddata = dataset.get_danish_dev_blind_corpus().parsed_sents()

            modelfile = 'danish.model'
            conllfile = 'danish.conll'

            tp = TransitionParser(Transition, FeatureExtractor)
            tp.train(traindata)
            tp.save(modelfile)

            # load model for testing
            tp = TransitionParser.load(modelfile)
            parsed = tp.parse(blinddata)

            ev = DependencyEvaluator(labeleddata, parsed)