with open(os.path.join(options.output, options.params), 'w') as paramsfp: pickle.dump((words, w2i, pos, rels, options), paramsfp) print 'Finished collecting vocab' print 'Initializing lstm mstparser:' parser = mstlstm.MSTParserLSTM(words, pos, rels, w2i, options) for epoch in xrange(options.epochs): print 'Starting epoch', epoch parser.Train(options.conll_train) conllu = (os.path.splitext( options.conll_dev.lower())[1] == '.conllu') devpath = os.path.join( options.output, 'dev_epoch_' + str(epoch + 1) + ('.conll' if not conllu else '.conllu')) utils.write_conll(devpath, parser.Predict(options.conll_dev)) parser.Save( os.path.join(options.output, os.path.basename(options.model) + str(epoch + 1))) if not conllu: os.system('perl conll/eval.pl -g ' + options.conll_dev + ' -s ' + devpath + ' > ' + devpath + '.txt') else: os.system( 'python conll/evaluation_script/conll17_ud_eval.py -v -w conll/evaluation_script/weights.clas ' + options.conll_dev + ' ' + devpath + ' > ' + devpath + '.txt')
devpath = os.path.join( options.output, 'dev_epoch_' + str(epoch + 1) + ('.conll' if not conllu else '.conllu')) utils.write_conll(devpath, parser.Predict(options.conll_dev)) if not conllu: os.system('perl src/utils/eval.pl -g ' + options.conll_dev + ' -s ' + devpath + ' > ' + devpath + '.txt') else: os.system( 'python src/utils/evaluation_script/conll17_ud_eval.py -v -w src/utils/evaluation_script/weights.clas ' + options.conll_dev + ' ' + devpath + ' > ' + devpath + '.txt') print 'Finished predicting dev' parser.Save( os.path.join(options.output, options.model + str(epoch + 1))) else: with open(options.params, 'r') as paramsfp: words, w2i, pos, rels, stored_opt = pickle.load(paramsfp) stored_opt.external_embedding = options.external_embedding parser = ArcHybridLSTM(words, pos, rels, w2i, stored_opt) parser.Load(options.model) conllu = (os.path.splitext(options.conll_test.lower())[1] == '.conllu') tespath = os.path.join( options.output, 'test_pred.conll' if not conllu else 'test_pred.conllu') ts = time.time() pred = list(parser.Predict(options.conll_test)) te = time.time()
eId = 0 for epoch in xrange(options.epochs): print '\n-----------------\nStarting epoch', epoch + 1 if epoch % 10 == 0: if epoch == 0: parser.trainer.restart(learning_rate=0.001) elif epoch == 10: parser.trainer.restart(learning_rate=0.0005) else: parser.trainer.restart(learning_rate=0.00025) parser.train(options.conll_train) if options.conll_dev == "N/A": parser.Save(os.path.join(options.output, os.path.basename(options.model))) else: devPredSents = parser.predict(options.conll_dev) count = 0 correct = 0 for idSent, devSent in enumerate(devPredSents): conll_devSent = [entry for entry in devSent if isinstance(entry, utils.ConllEntry)] for entry in conll_devSent: if entry.id <= 0: continue if len(entry.predicted_sequence) == len(entry.decoder_input): all_equal = True for g,p in zip(entry.decoder_input, entry.predicted_sequence):
trainpath = os.path.join(options.output, 'train_epoch_%03d.conll' % (epoch+1)) utils.write_conll(trainpath, parser.Predict(options.conll_train)) trainWLAS = utils.runeval(options.conll_train, trainpath, verbose=False) print 'Finished predicting train' mean = float('NaN') if len(deltas): mean = sum(deltas)/len(deltas) delta = trainWLAS - devWLAS print "delta mean: %.2f, current delta (train - dev): %.2f" % (mean,delta) deltas.append(delta) parser.Save(os.path.join(options.output, "%s_%03d" % (options.model, (epoch+1)))) else: # predicting if WITHCPOS: with open(options.params, 'r') as paramsfp: words, w2i, pos, cpos, GENDER, NUMBER, PERSON, CASE, rels, stored_opt = pickle.load(paramsfp) else: with open(options.params, 'r') as paramsfp: words, w2i, pos, rels, stored_opt = pickle.load(paramsfp) print "Finished loading vocab" # overwrite options read from params.pickle stored_opt.external_embedding = options.external_embedding stored_opt.external_embedding_filter = options.external_embedding_filter stored_opt.external_embedding_filter_new = options.external_embedding_filter_new
print 'Initializing blstm srl:' parser = SRLLSTM(words, lemmas, pos, roles, chars, options) max_len = max([len(d) for d in train_data]) min_len = min([len(d) for d in train_data]) buckets = [list() for i in range(min_len, max_len)] for d in train_data: buckets[len(d) - min_len - 1].append(d) buckets = [x for x in buckets if x != []] for epoch in xrange(options.epochs): print 'Starting epoch', epoch parser.Train(utils.get_batches(buckets, parser, True)) if options.save_epoch: parser.Save( os.path.join(options.outdir, options.model + str(epoch + 1))) if options.conll_dev != '': start = time.time() utils.write_conll( os.path.join(options.outdir, options.model) + str(epoch + 1) + '.txt', parser.Predict(options.conll_dev)) os.system('perl src/utils/eval.pl -g ' + options.conll_dev + ' -s ' + os.path.join(options.outdir, options.model) + str(epoch + 1) + '.txt' + ' > ' + os.path.join(options.outdir, options.model) + str(epoch + 1) + '.eval &') print 'Finished predicting dev; time:', time.time() - start parser.Save(os.path.join(options.outdir, options.model))
'overall progress:' + str(round(100 * float(t) / options.t, 2)) + '% current progress:' + str(round(100 * float(i + 1) / len(mini_batches), 2)) + '% loss=' + str(closs / 10) + ' time: ' + str(time.time() - start) + '\n') if t % 100 == 0 and options.conll_dev: if options.eval_non_avg: uas, las = test(parser, dev_buckets, options.conll_dev, options.output + '/dev.out') print 'dev non-avg acc', las, uas if las > best_las: best_las = las print 'saving non-avg with', best_las, uas parser.Save(options.output + '/model') no_improvement = 0 else: no_improvement += 1 avg_model = mstlstm.MSTParserLSTM( pos, rels, w2i, chars, options, parser) uas, las = test(avg_model, dev_buckets, options.conll_dev, options.output + '/dev.out') print 'dev avg acc', las, uas if las > best_las: best_las = las print 'saving avg with', best_las, uas avg_model.Save(options.output + '/model') no_improvement = 0 else:
max_len = max([len(d) for d in train_data]) min_len = min([len(d) for d in train_data]) buckets = [list() for i in range(min_len, max_len)] for d in train_data: buckets[len(d) - min_len - 1].append(d) buckets = [x for x in buckets if x != []] for epoch in xrange(options.epochs): print 'Starting epoch', epoch best_acc = parser.Train( utils.get_batches(buckets, parser, True, options.sen_cut), epoch, best_acc, options) if options.conll_dev == None: parser.Save(os.path.join(options.outdir, options.model)) if options.input and options.output: with open(os.path.join(options.outdir, options.params), 'r') as paramsfp: words, pWords, plemmas, pos, roles, chars, sense_mask, stored_opt = pickle.load( paramsfp) stored_opt.external_embedding = options.external_embedding parser = SRLLSTM(words, pWords, plemmas, pos, roles, chars, sense_mask, stored_opt) parser.Load(os.path.join(options.outdir, options.model)) ts = time.time() pred = list(parser.Predict(options.input, sen_cut, use_default)) te = time.time() utils.write_conll(options.output, pred) print 'Finished predicting test', te - ts
'dev_epoch_' + str(epoch + 1) + '.conll') utils.write_conll(devpath, parser.Predict(options.conll_dev)) os.system('perl src/utils/eval.pl -g ' + options.conll_dev + ' -s ' + devpath + ' > ' + devpath + '.txt') must_save = False with open(devpath + '.txt') as evalf: las = float(evalf.readline().split()[-2]) if las > best_las: print 'New best LAS', las best_las = las best_epoch = epoch must_save = True print 'Finished predicting dev' if must_save: # Save disk space... parser.Save(os.path.join(options.output, "model")) if options.conll_test: print 'Using best model', "model" + str(best_epoch + 1), ' to run on test' parser.Load(os.path.join(options.output, "model")) tespath = os.path.join(options.output, 'test_pred.conll') ts = time.time() pred = list(parser.Predict(options.conll_test)) te = time.time() utils.write_conll(tespath, pred) os.system('perl src/utils/eval.pl -g ' + options.conll_test + ' -s ' + tespath + ' > ' + tespath + '.txt') with open(tespath + '.txt') as evalf: tlas = float(evalf.readline().split()[-2]) tuas = float(evalf.readline().split()[-2])
# 如果不是conllu格式 if not conllu: os.system('perl barchybrid/src/utils/eval.pl -g ' + options.conll_dev + ' -s ' + devpath + ' > ' + devpath + '.txt') # 是conllu格式 else: os.system( '/usr/bin/python barchybrid/src/utils/evaluation_script/conll17_ud_eval.py -v -w src/utils/evaluation_script/weights.clas ' + options.conll_dev + ' ' + devpath + ' > ' + devpath + '.txt') print '预测发展集结束' # logger.info('预测发展集结束') parser.Save(options.model + str(epoch + 1)) # 测试过程,在测试集上 else: with open(options.params, 'r') as paramsfp: words, w2i, pos, rels, stored_opt = pickle.load(paramsfp) stored_opt.external_embedding = options.external_embedding parser = ArcHybridLSTM(words, pos, rels, w2i, stored_opt) for epoch in xrange(options.epochs): print '开始 第', epoch, '轮' # 从已经训练好的模型中载入模型 parser.Load(options.model + str(epoch + 1)) print '模型:' + options.model + str(epoch + 1) # 根据文件名后缀判断是否是conllu文件 conllu = (os.path.splitext(
with open(os.path.join(options.output, options.params), 'w') as paramsfp: pickle.dump((words, w2i, pos, rels, options), paramsfp) print 'Finished collecting vocab' print 'Initializing blstm arc hybrid:' parser = ArcHybridLSTM(words, pos, rels, w2i, options) for i, (epoch, train) in enumerate(zip(options.epochs.split(','), options.conll_train.split(',')), 1): for iepoch in range(1, int(epoch)+1): print 'Starting epoch', iepoch parser.Train(train) devpath = os.path.join(options.output, 'dev_epoch_' + str(i) + '_' + str(iepoch) + '.conll') utils.write_conll(devpath, parser.Predict(options.conll_dev)) os.system('perl src/utils/eval.pl -g ' + options.conll_dev + ' -s ' + devpath + ' > ' + devpath + '.txt &') print 'Finished predicting dev' parser.Save(os.path.join(options.output, options.model + '_' + str(i) + '_' + str(iepoch))) else: with open(options.params, 'r') as paramsfp: words, w2i, pos, rels, stored_opt = pickle.load(paramsfp) stored_opt.external_embedding = options.external_embedding parser = ArcHybridLSTM(words, pos, rels, w2i, stored_opt) parser.Load(options.model) tespath = os.path.join(options.output, 'test_pred.conll') ts = time.time() pred = parser.Predict(options.conll_test) te = time.time() utils.write_conll(tespath, pred) os.system('perl src/utils/eval.pl -g ' + options.conll_test + ' -s ' + tespath + ' > ' + tespath + '.txt &') print 'Finished predicting test',te-ts