Пример #1
0
        with open(os.path.join(options.output, options.params),
                  'w') as paramsfp:
            pickle.dump((words, w2i, pos, rels, options), paramsfp)
        print 'Finished collecting vocab'

        print 'Initializing lstm mstparser:'
        parser = mstlstm.MSTParserLSTM(words, pos, rels, w2i, options)

        for epoch in xrange(options.epochs):
            print 'Starting epoch', epoch
            parser.Train(options.conll_train)
            conllu = (os.path.splitext(
                options.conll_dev.lower())[1] == '.conllu')
            devpath = os.path.join(
                options.output, 'dev_epoch_' + str(epoch + 1) +
                ('.conll' if not conllu else '.conllu'))
            utils.write_conll(devpath, parser.Predict(options.conll_dev))
            parser.Save(
                os.path.join(options.output,
                             os.path.basename(options.model) + str(epoch + 1)))

            if not conllu:
                os.system('perl conll/eval.pl -g ' + options.conll_dev +
                          ' -s ' + devpath + ' > ' + devpath + '.txt')
            else:
                os.system(
                    'python conll/evaluation_script/conll17_ud_eval.py -v -w conll/evaluation_script/weights.clas '
                    + options.conll_dev + ' ' + devpath + ' > ' + devpath +
                    '.txt')
Пример #2
0
            devpath = os.path.join(
                options.output, 'dev_epoch_' + str(epoch + 1) +
                ('.conll' if not conllu else '.conllu'))
            utils.write_conll(devpath, parser.Predict(options.conll_dev))

            if not conllu:
                os.system('perl src/utils/eval.pl -g ' + options.conll_dev +
                          ' -s ' + devpath + ' > ' + devpath + '.txt')
            else:
                os.system(
                    'python src/utils/evaluation_script/conll17_ud_eval.py -v -w src/utils/evaluation_script/weights.clas '
                    + options.conll_dev + ' ' + devpath + ' > ' + devpath +
                    '.txt')

            print 'Finished predicting dev'
            parser.Save(
                os.path.join(options.output, options.model + str(epoch + 1)))
    else:
        with open(options.params, 'r') as paramsfp:
            words, w2i, pos, rels, stored_opt = pickle.load(paramsfp)

        stored_opt.external_embedding = options.external_embedding

        parser = ArcHybridLSTM(words, pos, rels, w2i, stored_opt)
        parser.Load(options.model)
        conllu = (os.path.splitext(options.conll_test.lower())[1] == '.conllu')
        tespath = os.path.join(
            options.output,
            'test_pred.conll' if not conllu else 'test_pred.conllu')
        ts = time.time()
        pred = list(parser.Predict(options.conll_test))
        te = time.time()
Пример #3
0
    eId = 0
    for epoch in xrange(options.epochs):
        print '\n-----------------\nStarting epoch', epoch + 1

        if epoch % 10 == 0:
            if epoch == 0:
                parser.trainer.restart(learning_rate=0.001)
            elif epoch == 10:
                parser.trainer.restart(learning_rate=0.0005)
            else:
                parser.trainer.restart(learning_rate=0.00025)

        parser.train(options.conll_train)

        if options.conll_dev == "N/A":
            parser.Save(os.path.join(options.output, os.path.basename(options.model)))

        else:
            devPredSents = parser.predict(options.conll_dev)

            count = 0
            correct = 0
            for idSent, devSent in enumerate(devPredSents):
                conll_devSent = [entry for entry in devSent if isinstance(entry, utils.ConllEntry)]

                for entry in conll_devSent:
                    if entry.id <= 0:
                        continue
                    if len(entry.predicted_sequence) == len(entry.decoder_input):
                        all_equal = True
                        for g,p in zip(entry.decoder_input, entry.predicted_sequence):
Пример #4
0
            trainpath = os.path.join(options.output, 'train_epoch_%03d.conll' % (epoch+1))
            utils.write_conll(trainpath, parser.Predict(options.conll_train))

	    trainWLAS = utils.runeval(options.conll_train, trainpath, verbose=False)	
            print 'Finished predicting train'
	    mean = float('NaN')
	    if len(deltas):
		mean = sum(deltas)/len(deltas)

	    delta = trainWLAS - devWLAS
	    
	    print "delta mean: %.2f, current delta (train - dev): %.2f" % (mean,delta)
	    
	    deltas.append(delta)
            parser.Save(os.path.join(options.output, "%s_%03d" % (options.model, (epoch+1))))
    else:
	# predicting
	if WITHCPOS:
            with open(options.params, 'r') as paramsfp:
    	        words, w2i, pos, cpos, GENDER, NUMBER, PERSON, CASE, rels, stored_opt = pickle.load(paramsfp)
	else:
            with open(options.params, 'r') as paramsfp:
    	        words, w2i, pos, rels, stored_opt = pickle.load(paramsfp)
	
	print "Finished loading vocab"

	# overwrite options read from params.pickle
        stored_opt.external_embedding = options.external_embedding
	stored_opt.external_embedding_filter = options.external_embedding_filter
	stored_opt.external_embedding_filter_new = options.external_embedding_filter_new
Пример #5
0
        print 'Initializing blstm srl:'
        parser = SRLLSTM(words, lemmas, pos, roles, chars, options)

        max_len = max([len(d) for d in train_data])
        min_len = min([len(d) for d in train_data])
        buckets = [list() for i in range(min_len, max_len)]
        for d in train_data:
            buckets[len(d) - min_len - 1].append(d)
        buckets = [x for x in buckets if x != []]

        for epoch in xrange(options.epochs):
            print 'Starting epoch', epoch
            parser.Train(utils.get_batches(buckets, parser, True))
            if options.save_epoch:
                parser.Save(
                    os.path.join(options.outdir,
                                 options.model + str(epoch + 1)))
            if options.conll_dev != '':
                start = time.time()
                utils.write_conll(
                    os.path.join(options.outdir, options.model) +
                    str(epoch + 1) + '.txt', parser.Predict(options.conll_dev))
                os.system('perl src/utils/eval.pl -g ' + options.conll_dev +
                          ' -s ' +
                          os.path.join(options.outdir, options.model) +
                          str(epoch + 1) + '.txt' + ' > ' +
                          os.path.join(options.outdir, options.model) +
                          str(epoch + 1) + '.eval &')
                print 'Finished predicting dev; time:', time.time() - start
        parser.Save(os.path.join(options.outdir, options.model))
Пример #6
0
     'overall progress:' +
     str(round(100 * float(t) / options.t, 2)) +
     '% current progress:' +
     str(round(100 * float(i + 1) / len(mini_batches), 2)) +
     '% loss=' + str(closs / 10) + ' time: ' +
     str(time.time() - start) + '\n')
 if t % 100 == 0 and options.conll_dev:
     if options.eval_non_avg:
         uas, las = test(parser, dev_buckets,
                         options.conll_dev,
                         options.output + '/dev.out')
         print 'dev non-avg acc', las, uas
         if las > best_las:
             best_las = las
             print 'saving non-avg with', best_las, uas
             parser.Save(options.output + '/model')
             no_improvement = 0
         else:
             no_improvement += 1
     avg_model = mstlstm.MSTParserLSTM(
         pos, rels, w2i, chars, options, parser)
     uas, las = test(avg_model, dev_buckets,
                     options.conll_dev,
                     options.output + '/dev.out')
     print 'dev avg acc', las, uas
     if las > best_las:
         best_las = las
         print 'saving avg with', best_las, uas
         avg_model.Save(options.output + '/model')
         no_improvement = 0
     else:
Пример #7
0
        max_len = max([len(d) for d in train_data])
        min_len = min([len(d) for d in train_data])
        buckets = [list() for i in range(min_len, max_len)]
        for d in train_data:
            buckets[len(d) - min_len - 1].append(d)
        buckets = [x for x in buckets if x != []]

        for epoch in xrange(options.epochs):
            print 'Starting epoch', epoch
            best_acc = parser.Train(
                utils.get_batches(buckets, parser, True, options.sen_cut),
                epoch, best_acc, options)

        if options.conll_dev == None:
            parser.Save(os.path.join(options.outdir, options.model))

    if options.input and options.output:
        with open(os.path.join(options.outdir, options.params),
                  'r') as paramsfp:
            words, pWords, plemmas, pos, roles, chars, sense_mask, stored_opt = pickle.load(
                paramsfp)
        stored_opt.external_embedding = options.external_embedding
        parser = SRLLSTM(words, pWords, plemmas, pos, roles, chars, sense_mask,
                         stored_opt)
        parser.Load(os.path.join(options.outdir, options.model))
        ts = time.time()
        pred = list(parser.Predict(options.input, sen_cut, use_default))
        te = time.time()
        utils.write_conll(options.output, pred)
        print 'Finished predicting test', te - ts
Пример #8
0
                                   'dev_epoch_' + str(epoch + 1) + '.conll')
            utils.write_conll(devpath, parser.Predict(options.conll_dev))
            os.system('perl src/utils/eval.pl -g ' + options.conll_dev +
                      ' -s ' + devpath + ' > ' + devpath + '.txt')
            must_save = False
            with open(devpath + '.txt') as evalf:
                las = float(evalf.readline().split()[-2])
                if las > best_las:
                    print 'New best LAS', las
                    best_las = las
                    best_epoch = epoch
                    must_save = True

            print 'Finished predicting dev'
            if must_save:  # Save disk space...
                parser.Save(os.path.join(options.output, "model"))

        if options.conll_test:
            print 'Using best model', "model" + str(best_epoch +
                                                    1), ' to run on test'
            parser.Load(os.path.join(options.output, "model"))
            tespath = os.path.join(options.output, 'test_pred.conll')
            ts = time.time()
            pred = list(parser.Predict(options.conll_test))
            te = time.time()
            utils.write_conll(tespath, pred)
            os.system('perl src/utils/eval.pl -g ' + options.conll_test +
                      ' -s ' + tespath + ' > ' + tespath + '.txt')
            with open(tespath + '.txt') as evalf:
                tlas = float(evalf.readline().split()[-2])
                tuas = float(evalf.readline().split()[-2])
Пример #9
0
            # 如果不是conllu格式
            if not conllu:
                os.system('perl barchybrid/src/utils/eval.pl -g ' +
                          options.conll_dev + ' -s ' + devpath + ' > ' +
                          devpath + '.txt')
            # 是conllu格式
            else:
                os.system(
                    '/usr/bin/python barchybrid/src/utils/evaluation_script/conll17_ud_eval.py -v -w src/utils/evaluation_script/weights.clas '
                    + options.conll_dev + ' ' + devpath + ' > ' + devpath +
                    '.txt')

            print '预测发展集结束'
            # logger.info('预测发展集结束')
            parser.Save(options.model + str(epoch + 1))
    # 测试过程,在测试集上
    else:
        with open(options.params, 'r') as paramsfp:
            words, w2i, pos, rels, stored_opt = pickle.load(paramsfp)

        stored_opt.external_embedding = options.external_embedding

        parser = ArcHybridLSTM(words, pos, rels, w2i, stored_opt)
        for epoch in xrange(options.epochs):
            print '开始 第', epoch, '轮'
            # 从已经训练好的模型中载入模型
            parser.Load(options.model + str(epoch + 1))
            print '模型:' + options.model + str(epoch + 1)
            # 根据文件名后缀判断是否是conllu文件
            conllu = (os.path.splitext(
Пример #10
0
            with open(os.path.join(options.output, options.params), 'w') as paramsfp:
                pickle.dump((words, w2i, pos, rels, options), paramsfp)
            print 'Finished collecting vocab'

            print 'Initializing blstm arc hybrid:'
            parser = ArcHybridLSTM(words, pos, rels, w2i, options)

        for i, (epoch, train) in enumerate(zip(options.epochs.split(','), options.conll_train.split(',')), 1):
            for iepoch in range(1, int(epoch)+1):
                print 'Starting epoch', iepoch
                parser.Train(train)
                devpath = os.path.join(options.output, 'dev_epoch_' + str(i) + '_' + str(iepoch) + '.conll')
                utils.write_conll(devpath, parser.Predict(options.conll_dev))
                os.system('perl src/utils/eval.pl -g ' + options.conll_dev + ' -s ' + devpath  + ' > ' + devpath + '.txt &')
                print 'Finished predicting dev'
                parser.Save(os.path.join(options.output, options.model + '_' + str(i) + '_' + str(iepoch)))
    else:
        with open(options.params, 'r') as paramsfp:
            words, w2i, pos, rels, stored_opt = pickle.load(paramsfp)

        stored_opt.external_embedding = options.external_embedding

        parser = ArcHybridLSTM(words, pos, rels, w2i, stored_opt)
        parser.Load(options.model)
        tespath = os.path.join(options.output, 'test_pred.conll')
        ts = time.time()
        pred = parser.Predict(options.conll_test)
        te = time.time()
        utils.write_conll(tespath, pred)
        os.system('perl src/utils/eval.pl -g ' + options.conll_test + ' -s ' + tespath  + ' > ' + tespath + '.txt &')
        print 'Finished predicting test',te-ts