with open(options.params, 'r') as paramsfp: words, w2i, pos, rels, stored_opt = pickle.load(paramsfp) stored_opt.external_embedding = options.external_embedding print 'Initializing lstm mstparser:' parser = mstlstm.MSTParserLSTM(words, pos, rels, w2i, stored_opt) parser.Load(options.model) conllu = (os.path.splitext(options.conll_test.lower())[1] == '.conllu') tespath = os.path.join( options.output, 'test_pred.conll' if not conllu else 'test_pred.conllu') ts = time.time() test_res = list(parser.Predict(options.conll_test)) te = time.time() print 'Finished predicting test.', te - ts, 'seconds.' utils.write_conll(tespath, test_res) if not conllu: os.system('perl conll/eval.pl -g ' + options.conll_test + ' -s ' + tespath + ' > ' + tespath + '.txt') else: os.system( 'python conll/evaluation_script/conll17_ud_eval.py -v -w conll/evaluation_script/weights.clas ' + options.conll_test + ' ' + tespath + ' > ' + tespath + '.txt') else: print 'Preparing vocab' words, w2i, pos, rels = utils.vocab(options.conll_train)
if index not in train_data: dev_data[index] = fulltrain_data[index] dev_id2arg2rel[index] = train_id2arg2rel[index] #parser = learner.jNeRE(words, nertags, postagCount, rels, w2i, c2i, options) parser = learner.jNeRE(words, nertags, rels, w2i, c2i, options) for epoch in xrange(options.epochs): print '\n-----------------\nStarting epoch', epoch + 1 #parser.Train(train_data, train_id2nerBILOU, id2arg2rel, classweights) parser.Train(train_data, train_id2nerBILOU, id2arg2rel) label_pred = [] label_correct = [] predDev, relsDev = parser.Predict(dev_data) #pickle.dump((predDev, relsDev), open(options.output + "dev_ep" + str(epoch + 1), "wb")) for sentenceID in predDev: label_pred.append(predDev[sentenceID]) label_correct.append(train_id2nerBILOU[sentenceID].strip().split()) assert len(label_pred) == len(label_correct) f1 = compute_NER_f1_macro(label_pred, label_correct, 'O', "IOBES") f1_b = compute_NER_f1_macro(label_pred, label_correct, 'B', "IOBES") if f1_b > f1: logging.debug( "Setting wrong tags to B- improves from %.4f to %.4f" % (f1, f1_b))
with open(options.conll_test, 'r') as conllFP: devData = list(utils.read_conll(conllFP, parser.c2i)) conll_sentences = [] for sentence in devData: conll_sentence = [ entry for entry in sentence if isinstance(entry, utils.ConllEntry) ] conll_sentences.append(conll_sentence) tespath = os.path.join(options.output, options.conll_test_output) print('Predicting parsing dependencies') ts = time.time() test_res = list(parser.Predict(conll_sentences, True)) te = time.time() print('Finished in', te - ts, 'seconds.') utils.write_conll(tespath, test_res) else: ext_words_train = utils.ext_vocab(options.conll_train, options.external_embedding_voc) ext_words_dev = utils.ext_vocab(options.conll_dev, options.external_embedding_voc) print('Extracting vocabulary') words, w2i, c2i, pos, xpos, rels = utils.vocab(options.conll_train) with open(os.path.join(options.output, options.params),
with open(options.params, 'r') as paramsfp: words, w2i, pos, rels, stored_opt = pickle.load(paramsfp) stored_opt.external_embedding = options.external_embedding print 'Initializing lstm mstparser:' parser = mstlstm.MSTParserLSTM(words, pos, rels, w2i, stored_opt) parser.Load(options.model) conllu = (os.path.splitext(options.conll_test.lower())[1] == '.conllu') tespath = os.path.join( options.output, 'test_pred.conll' if not conllu else 'test_pred.conllu') ts = time.time() test_res = list(parser.Predict(options.conll_test, options.batch_size)) te = time.time() print 'Finished predicting test.', te - ts, 'seconds.' utils.write_conll(tespath, test_res) if not conllu: os.system('perl src/utils/eval.pl -g ' + options.conll_test + ' -s ' + tespath + ' > ' + tespath + '.txt') else: os.system( 'python src/utils/evaluation_script/conll17_ud_eval.py -v -w src/utils/evaluation_script/weights.clas ' + options.conll_test + ' ' + tespath + ' > ' + testpath + '.txt') else: print 'Preparing vocab' words, w2i, pos, rels = utils.vocab(options.conll_train)
for d in train_data: buckets[len(d) - min_len - 1].append(d) buckets = [x for x in buckets if x != []] for epoch in xrange(options.epochs): print 'Starting epoch', epoch parser.Train(utils.get_batches(buckets, parser, True)) if options.save_epoch: parser.Save( os.path.join(options.outdir, options.model + str(epoch + 1))) if options.conll_dev != '': start = time.time() utils.write_conll( os.path.join(options.outdir, options.model) + str(epoch + 1) + '.txt', parser.Predict(options.conll_dev)) os.system('perl src/utils/eval.pl -g ' + options.conll_dev + ' -s ' + os.path.join(options.outdir, options.model) + str(epoch + 1) + '.txt' + ' > ' + os.path.join(options.outdir, options.model) + str(epoch + 1) + '.eval &') print 'Finished predicting dev; time:', time.time() - start parser.Save(os.path.join(options.outdir, options.model)) if options.input and options.output: with open(options.outdir + '/' + options.params, 'r') as paramsfp: words, lemmas, pos, roles, chars, stored_opt = pickle.load( paramsfp) stored_opt.external_embedding = options.external_embedding parser = SRLLSTM(words, lemmas, pos, roles, chars, stored_opt)
stored_opt.external_embedding = None print 'Loading pre-trained model' parser = learner.jPosDepLearner(words, pos, rels, w2i, c2i, caps, stored_opt) parser.Load(options.model) testoutpath = os.path.join(options.output, options.conll_test_output) print 'Predicting POS tags and parsing dependencies' #ts = time.time() #test_pred = list(parser.Predict(options.conll_test)) #te = time.time() #print 'Finished in', te-ts, 'seconds.' #utils.write_conll(testoutpath, test_pred) with open(testoutpath, 'w') as fh: for sentence in parser.Predict(options.conll_test): print sentence for entry in sentence[1:]: fh.write(str(entry) + '\n') fh.write('\n') else: print("Training file: " + options.conll_train) if options.conll_dev != "N/A": print("Development file: " + options.conll_dev) highestScore = 0.0 eId = 0 flag1 = 1 if os.path.isfile(os.path.join(options.output, options.params)) and \ os.path.isfile(os.path.join(options.output, os.path.basename(options.model))) and flag1==0 :
print 'Initializing blstm arc hybrid:' if WITHCPOS: print "Using features as well" #print "ff", GENDER, NUMBER, PERSON parser = ArcHybridLSTM(words, pos, cpos, GENDER, NUMBER, PERSON, CASE, rels, w2i, options) else: parser = ArcHybridLSTM(words, pos, rels, w2i, options) deltas = [] for epoch in xrange(options.epochs): print '\n================\nStarting epoch', epoch+1 parser.Train(options.conll_train, epoch) #devpath = os.path.join(options.output, 'dev_epoch_' + str(epoch+1) + '.conll') devpath = os.path.join(options.output, 'dev_epoch_%03d.conll' % (epoch+1)) utils.write_conll(devpath, parser.Predict(options.conll_dev)) # run evaluation #command = 'perl src/utils/eval.pl -g ' + options.conll_dev + ' -s ' + devpath + ' > ' + devpath + '.txt ' #print "executing: %s" % command #os.system(command) # just show current LAS #ifp = open(devpath + '.txt') #print "current LAS", ifp.readline() #ifp.close() # command = "~/bin/toolbin/conll/evaluation_script/conll17_ud_eval.py --weights ~/bin/toolbin/conll/evaluation_script/weights.clas " + options.conll_dev + " " + devpath + " > " + devpath + '.txt4' # print "executing: %s" % command # os.system(command) # # just show current LAS # ifp = open(devpath + '.txt4')
with open(os.path.join(options.output, options.params), 'rb') as paramsfp: words, w2i, c2i, m2i, t2i, morph_dict, pos, rels, stored_opt = pickle.load( paramsfp) stored_opt.external_embedding = None print('Loading pre-trained model') parser = learner.jPosDepLearner(words, pos, rels, w2i, c2i, m2i, t2i, morph_dict, stored_opt) parser.Load(os.path.join(options.output, options.model)) testoutpath = os.path.join(options.output, options.conll_test_output) print('Predicting POS tags and parsing dependencies') with open(testoutpath, 'w') as fh: for sentence in parser.Predict(options.conll_test): for entry in sentence[1:]: fh.write(str(entry) + '\n') fh.write('\n') else: print("Training file: " + options.conll_train) highestScore = 0.0 eId = 0 print('Extracting vocabulary') morph_dict = utils.get_morph_dict(options.segmentation_path, options.lowerCase) words, w2i, c2i, m2i, t2i, pos, rels = utils.vocab( options.conll_train, morph_dict)
print 'Using external embedding:', options.external_embedding if options.predictFlag: with open(options.params, 'r') as paramsfp: words, w2i, c2i, pos, rels, stored_opt = pickle.load(paramsfp) stored_opt.external_embedding = options.external_embedding print 'Loading pre-trained joint model' parser = learner.jPosDepLearner(words, pos, rels, w2i, c2i, stored_opt) parser.Load(options.model) tespath = os.path.join(options.output, options.conll_test_output) print 'Predicting POS tags and parsing dependencies' ts = time.time() test_res = list(parser.Predict(options.conll_test)) te = time.time() print 'Finished in', te - ts, 'seconds.' utils.write_conll(tespath, test_res) #conllu = (os.path.splitext(options.conll_test.lower())[1] == '.conllu') #if not conllu:#Scored with punctuation # os.system('perl utils/eval07.pl -q -g ' + options.conll_test + ' -s ' + tespath + ' > ' + tespath + '.scores.txt') #else: # os.system('python utils/evaluation_script/conll17_ud_eval.py -v -w utils/evaluation_script/weights.clas ' + options.conll_test + ' ' + tespath + ' > ' + tespath + '.scores.txt') else: print 'Extracting vocabulary' words, w2i, c2i, pos, rels = utils.vocab(options.conll_train) with open(os.path.join(options.output, options.params), 'w') as paramsfp:
#print 'Finished predicting dev' print "Total time:", total_time, "words/sec:", nwords / total_time, "sents/sec:", nsents / total_time else: with open(options.params, 'r') as paramsfp: words, w2i, pos, rels, stored_opt = pickle.load(paramsfp) stored_opt.external_embedding = options.external_embedding parser = ArcHybridLSTM(words, pos, rels, w2i, stored_opt) parser.Load(options.model) conllu = (os.path.splitext(options.conll_test.lower())[1] == '.conllu') tespath = os.path.join( options.output, 'test_pred.conll' if not conllu else 'test_pred.conllu') ts = time.time() pred = list(parser.Predict(options.conll_test, options.batch_size)) te = time.time() pred_time = te - ts nsents = len(pred) nwords = sum([len(s) for s in pred]) print pred_time, "sents/sec:", nsents / pred_time, "words/sec:", nwords / pred_time print nsents, nwords utils.write_conll(tespath, pred) #if not conllu: # os.system('perl src/utils/eval.pl -g ' + options.conll_test + ' -s ' + tespath + ' > ' + tespath + '.txt') #else: # os.system('python src/utils/evaluation_script/conll17_ud_eval.py -v -w src/utils/evaluation_script/weights.clas ' + options.conll_test + ' ' + tespath + ' > ' + testpath + '.txt') print 'Finished predicting test', te - ts
epoch, best_acc, options) if options.conll_dev == None: parser.Save(os.path.join(options.outdir, options.model)) if options.input and options.output: with open(os.path.join(options.outdir, options.params), 'r') as paramsfp: words, pWords, plemmas, pos, roles, chars, sense_mask, stored_opt = pickle.load( paramsfp) stored_opt.external_embedding = options.external_embedding parser = SRLLSTM(words, pWords, plemmas, pos, roles, chars, sense_mask, stored_opt) parser.Load(os.path.join(options.outdir, options.model)) ts = time.time() pred = list(parser.Predict(options.input, sen_cut, use_default)) te = time.time() utils.write_conll(options.output, pred) print 'Finished predicting test', te - ts if options.inputdir and options.outputdir: with open(os.path.join(options.outdir, options.params), 'r') as paramsfp: words, pWords, plemmas, pos, roles, chars, sense_mask, stored_opt = pickle.load( paramsfp) stored_opt.external_embedding = options.external_embedding parser = SRLLSTM(words, pWords, plemmas, pos, roles, chars, sense_mask, stored_opt) parser.Load(os.path.join(options.outdir, options.model)) ts = time.time() for dir, subdir, files in os.walk(options.inputdir):
max_len = max([len(d) for d in train_data]) min_len = min([len(d) for d in train_data]) buckets = [list() for i in range(min_len, max_len)] for d in train_data: buckets[len(d) - min_len - 1].append(d) buckets = [x for x in buckets if x != []] for epoch in xrange(options.epochs): print 'Starting epoch', epoch print 'best F-score before starting the epoch: ' + str( best_f_score) best_f_score = parser.Train( utils.get_batches(buckets, parser, True), epoch, best_f_score, options) print 'best F-score after finishing the epoch: ' + str( best_f_score) if options.input and options.output: with open(os.path.join(options.outdir, options.params), 'r') as paramsfp: words, lemmas, pos, roles, chars, stored_opt = pickle.load( paramsfp) stored_opt.external_embedding = options.external_embedding parser = SRLLSTM(words, lemmas, pos, roles, chars, stored_opt) parser.Load(os.path.join(options.outdir, options.model)) print 'loaded the model' ts = time.time() pred = list(parser.Predict(options.input)) te = time.time() utils.write_conll(options.output, pred) print 'Finished predicting test', te - ts
print 'Using external embedding:', options.external_embedding if options.predictFlag: with open(os.path.join(options.outdir, options.params), 'r') as paramsfp: words, w2i, c2i, pos, rels, morphs, stored_opt = pickle.load(paramsfp) stored_opt.external_embedding = options.external_embedding stored_opt.pretrain_wembed = options.pretrain_wembed print 'Loading pre-trained joint model' parser = learner.jPosDepLearner(words, pos, rels, morphs, w2i, c2i, stored_opt) parser.Load(os.path.join(options.outdir, os.path.basename(options.model))) conllu = (os.path.splitext(options.conll_test.lower())[1] == '.conllu') tespath = os.path.join(options.outdir, stored_opt.model + 'test_pred.conllu') print 'Predicting POS tags and parsing dependencies' devPredSents = parser.Predict(options.conll_test) te = time.time() print 'Finished in', te-ts, 'seconds.' utils.write_conll(tespath, test_res) if not conllu:#Scored with punctuation os.system('perl utils/eval07.pl -q -g ' + options.conll_test + ' -s ' + tespath + ' > ' + tespath + '.scores.txt') else: os.system('python utils/evaluation_script/conll17_ud_eval.py -v -w utils/evaluation_script/weights.clas ' + options.conll_gold + ' ' + tespath + ' > ' + tespath + '.scores.txt') else: if os.path.isfile(os.path.join(options.outdir, options.params)): print("Load existed vocabulary.") with open(os.path.join(options.outdir, options.params), 'r') as paramsfp: words, w2i, c2i, pos, rels, morphs, stored_opt = pickle.load(paramsfp) else:
print 'Preparing vocab' words, w2i, pos, rels = utils.vocab(options.conll_train) with open(os.path.join(options.output, options.params), 'w') as paramsfp: pickle.dump((words, w2i, pos, rels, options), paramsfp) print 'Finished collecting vocab' print 'Initializing blstm arc hybrid:' parser = ArcHybridLSTM(words, pos, rels, w2i, options) for i, (epoch, train) in enumerate(zip(options.epochs.split(','), options.conll_train.split(',')), 1): for iepoch in range(1, int(epoch)+1): print 'Starting epoch', iepoch parser.Train(train) devpath = os.path.join(options.output, 'dev_epoch_' + str(i) + '_' + str(iepoch) + '.conll') utils.write_conll(devpath, parser.Predict(options.conll_dev)) os.system('perl src/utils/eval.pl -g ' + options.conll_dev + ' -s ' + devpath + ' > ' + devpath + '.txt &') print 'Finished predicting dev' parser.Save(os.path.join(options.output, options.model + '_' + str(i) + '_' + str(iepoch))) else: with open(options.params, 'r') as paramsfp: words, w2i, pos, rels, stored_opt = pickle.load(paramsfp) stored_opt.external_embedding = options.external_embedding parser = ArcHybridLSTM(words, pos, rels, w2i, stored_opt) parser.Load(options.model) tespath = os.path.join(options.output, 'test_pred.conll') ts = time.time() pred = parser.Predict(options.conll_test) te = time.time()