emb_mat = np.zeros((len(forms), emb.dim)) for w in forms: emb_mat[forms[w]] = emb[w] emb_mat[forms['-OOV-']] = emb[emb.unk].reshape(1, emb.dim) emb_mat[forms['-PAD-']] = np.zeros((1, emb.dim)) print("constructing character list ...") chars = [] for w in words: chars.extend(w) chars = Label('FORM', list(set(chars))) upos_tags = training_corpus.tagset['UPOS'] | validation_corpus.tagset['UPOS'] upos = Label('UPOS', list(upos_tags), discrete=True) batch_size = 10 tags = [upos] training_generator = UDDataGenerator(training_corpus, forms, chars, tags) validation_generator = UDDataGenerator(validation_corpus, forms, chars, tags) test_generator = UDDataGenerator(test_corpus, forms, chars, tags) tagger = Tagger(training_generator, validation_generator, test_generator, word_emb_mat=emb_mat, num_epochs=10) tagger.fit() ##tagger.save_model() score = tagger.evaluate(test_generator) print("Accuracy on the test data: {0:1.4f}".format(score[1]))