def dev_nlpcc(model): dev_instances = stst.load_parse_data(dev_file, nlp) model.test(dev_instances, dev_file) # evaluation acc, _, _, _ = stst.Evaluation(dev_file, model.output_file) print(acc) return acc
def test_sts(model): test_file = './data/stsbenchmark/sts-test.csv' test_instances = stst.load_parse_data(test_file, nlp) model.test(test_instances, test_file) test_pearsonr = stst.eval_output_file(model.output_file) print('Test:', test_pearsonr) return test_pearsonr
def dev_sts(model): dev_file = './data/stsbenchmark/sts-dev.csv' dev_instances = stst.load_parse_data(dev_file, nlp) model.test(dev_instances, dev_file) dev_pearsonr = stst.eval_output_file(model.output_file) print('Dev:', dev_pearsonr) return dev_pearsonr
def feature_alation(model): train_instances = stst.load_parse_data(train_file) dev_instances = stst.load_parse_data(dev_file) feature_list = model.feature_list model.train(train_instances, train_file) model.test(dev_instances, dev_file) exit(1) for feature in feature_list: model.feature_list = [feature] model.train(train_instances, train_file) model.test(dev_instances, dev_file) # evaluation acc, _, _, _ = stst.Evaluation(dev_file, model.output_file) print(feature.feature_name) print(acc)
def stack_nlpcc(model): if 'stack' not in model.model_name: raise NotImplementedError train_instances = stst.load_parse_data(train_file, nlp) model.cross_validation(train_instances, train_file) # evaluation acc, _, _, _ = stst.Evaluation(train_file, model.output_file) print(acc) return acc
model.add(stst.WeightednGramMatchFeature(type='lemma')) model.add(stst.BOWFeature(stopwords=False)) model.add(stst.AlignmentFeature()) model.add(stst.IdfAlignmentFeature()) model.add(stst.NegativeFeature()) # train and test train_file = './data/stsbenchmark/sts-train.csv' dev_file = './data/stsbenchmark/sts-dev.csv' test_file = './data/stsbenchmark/sts-test.csv' # init the server and input the address nlp = stst.StanfordNLP('http://localhost:9000') # parse data train_instances = stst.load_parse_data(train_file, nlp) dev_instances = stst.load_parse_data(dev_file, nlp) # train and test model.train(train_instances, train_file) model.test(dev_instances, dev_file) # evaluation dev_pearsonr = stst.eval_output_file(model.output_file) print('Dev:', dev_pearsonr) # test on new data set test_instances = stst.load_parse_data(test_file, nlp) model.test(test_instances, test_file) test_pearsonr = stst.eval_output_file(model.output_file) print('Test:', test_pearsonr)
def test_nlpcc(model): test_instances = stst.load_parse_data(test_file, nlp) model.test(test_instances, test_file) acc, _, _, _ = stst.Evaluation(test_file, model.output_file) print(acc) return acc
def train_nlpcc(model): train_instances = stst.load_parse_data(train_file, nlp) model.train(train_instances, train_file)
def train_sts(model): train_file = './data/stsbenchmark/sts-train.csv' train_instances = stst.load_parse_data(train_file, nlp) model.train(train_instances, train_file)
def train_sts(model): train_file = './data/stsbenchmark/sts-train.csv' train_file = './data/stsbenchmark/train_ai-lab.csv' # 利用StanfordNLP对文本数据进行预处理,包括切词、标注、句法解析等等 train_instances = stst.load_parse_data(train_file, nlp) model.train(train_instances, train_file)